上传了源代码项目

develop_MaAo
BagPipeOuO 1 month ago
parent d57b06c39d
commit e231dae4cc

20
.gitignore vendored

@ -0,0 +1,20 @@
# Binaries produced by "make".
afl-analyze
afl-as
afl-clang
afl-clang++
afl-fuzz
afl-g++
afl-gcc
afl-gotcpu
afl-showmap
afl-tmin
as
# Binaries produced by "make -C llvm_mode"
afl-clang-fast
afl-clang-fast++
afl-llvm-pass.so
afl-llvm-rt-32.o
afl-llvm-rt-64.o
afl-llvm-rt.o

@ -0,0 +1,60 @@
language: c
env:
- AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_STOP_MANUALLY=1
- AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_EXIT_WHEN_DONE=1
# TODO: test AFL_BENCH_UNTIL_CRASH once we have a target that crashes
- AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_BENCH_JUST_ONE=1
before_install:
- sudo apt update
- sudo apt install -y libtool libtool-bin automake bison libglib2.0
# TODO: Look into splitting off some builds using a build matrix.
# TODO: Move this all into a bash script so we don't need to write bash in yaml.
script:
- make
- ./afl-gcc ./test-instr.c -o test-instr-gcc
- mkdir seeds
- echo "" > seeds/nil_seed
- if [ -z "$AFL_STOP_MANUALLY" ];
then ./afl-fuzz -i seeds -o out/ -- ./test-instr-gcc;
else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-gcc;
fi
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 3
- rm -r out/*
- ./afl-clang ./test-instr.c -o test-instr-clang
- if [ -z "$AFL_STOP_MANUALLY" ];
then ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang;
else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang;
fi
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 2
- make clean
- CC=clang CXX=clang++ make
- cd llvm_mode
# TODO: Build with different versions of clang/LLVM since LLVM passes don't
# have a stable API.
- CC=clang CXX=clang++ LLVM_CONFIG=llvm-config make
- cd ..
- rm -r out/*
- ./afl-clang-fast ./test-instr.c -o test-instr-clang-fast
- if [ -z "$AFL_STOP_MANUALLY" ];
then ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang-fast;
else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang-fast;
fi
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 3
# Test fuzzing libFuzzer targets and trace-pc-guard instrumentation.
- clang -g -fsanitize-coverage=trace-pc-guard ./test-libfuzzer-target.c -c
- clang -c -w llvm_mode/afl-llvm-rt.o.c
- wget https://raw.githubusercontent.com/llvm/llvm-project/main/compiler-rt/lib/fuzzer/afl/afl_driver.cpp
- clang++ afl_driver.cpp afl-llvm-rt.o.o test-libfuzzer-target.o -o test-libfuzzer-target
- timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-libfuzzer-target
- cd qemu_mode
- ./build_qemu_support.sh
- cd ..
- gcc ./test-instr.c -o test-no-instr
- if [ -z "$AFL_STOP_MANUALLY" ];
then ./afl-fuzz -Q -i seeds -o out/ -- ./test-no-instr;
else timeout --preserve-status 5s ./afl-fuzz -Q -i seeds -o out/ -- ./test-no-instr;
fi
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 12 -p 9

@ -1,4 +1,493 @@
# 开源项目阅读AFL # american fuzzy lop
- 源代码地址https://github.com/google/AFL [![Build Status](https://travis-ci.org/google/AFL.svg?branch=master)](https://travis-ci.org/google/AFL)
Originally developed by Michal Zalewski <lcamtuf@google.com>.
See [QuickStartGuide.txt](docs/QuickStartGuide.txt) if you don't have time to read
this file.
## 1) Challenges of guided fuzzing
Fuzzing is one of the most powerful and proven strategies for identifying
security issues in real-world software; it is responsible for the vast
majority of remote code execution and privilege escalation bugs found to date
in security-critical software.
Unfortunately, fuzzing is also relatively shallow; blind, random mutations
make it very unlikely to reach certain code paths in the tested code, leaving
some vulnerabilities firmly outside the reach of this technique.
There have been numerous attempts to solve this problem. One of the early
approaches - pioneered by Tavis Ormandy - is corpus distillation. The method
relies on coverage signals to select a subset of interesting seeds from a
massive, high-quality corpus of candidate files, and then fuzz them by
traditional means. The approach works exceptionally well, but requires such
a corpus to be readily available. In addition, block coverage measurements
provide only a very simplistic understanding of program state, and are less
useful for guiding the fuzzing effort in the long haul.
Other, more sophisticated research has focused on techniques such as program
flow analysis ("concolic execution"), symbolic execution, or static analysis.
All these methods are extremely promising in experimental settings, but tend
to suffer from reliability and performance problems in practical uses - and
currently do not offer a viable alternative to "dumb" fuzzing techniques.
## 2) The afl-fuzz approach
American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple
but rock-solid instrumentation-guided genetic algorithm. It uses a modified
form of edge coverage to effortlessly pick up subtle, local-scale changes to
program control flow.
Simplifying a bit, the overall algorithm can be summed up as:
1) Load user-supplied initial test cases into the queue,
2) Take next input file from the queue,
3) Attempt to trim the test case to the smallest size that doesn't alter
the measured behavior of the program,
4) Repeatedly mutate the file using a balanced and well-researched variety
of traditional fuzzing strategies,
5) If any of the generated mutations resulted in a new state transition
recorded by the instrumentation, add mutated output as a new entry in the
queue.
6) Go to 2.
The discovered test cases are also periodically culled to eliminate ones that
have been obsoleted by newer, higher-coverage finds; and undergo several other
instrumentation-driven effort minimization steps.
As a side result of the fuzzing process, the tool creates a small,
self-contained corpus of interesting test cases. These are extremely useful
for seeding other, labor- or resource-intensive testing regimes - for example,
for stress-testing browsers, office applications, graphics suites, or
closed-source tools.
The fuzzer is thoroughly tested to deliver out-of-the-box performance far
superior to blind fuzzing or coverage-only tools.
## 3) Instrumenting programs for use with AFL
When source code is available, instrumentation can be injected by a companion
tool that works as a drop-in replacement for gcc or clang in any standard build
process for third-party code.
The instrumentation has a fairly modest performance impact; in conjunction with
other optimizations implemented by afl-fuzz, most programs can be fuzzed as fast
or even faster than possible with traditional tools.
The correct way to recompile the target program may vary depending on the
specifics of the build process, but a nearly-universal approach would be:
```shell
$ CC=/path/to/afl/afl-gcc ./configure
$ make clean all
```
For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`.
The clang wrappers (afl-clang and afl-clang++) can be used in the same way;
clang users may also opt to leverage a higher-performance instrumentation mode,
as described in llvm_mode/README.llvm.
When testing libraries, you need to find or write a simple program that reads
data from stdin or from a file and passes it to the tested library. In such a
case, it is essential to link this executable against a static version of the
instrumented library, or to make sure that the correct .so file is loaded at
runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static
build, usually possible via:
```shell
$ CC=/path/to/afl/afl-gcc ./configure --disable-shared
```
Setting `AFL_HARDEN=1` when calling 'make' will cause the CC wrapper to
automatically enable code hardening options that make it easier to detect
simple memory bugs. Libdislocator, a helper library included with AFL (see
libdislocator/README.dislocator) can help uncover heap corruption issues, too.
PS. ASAN users are advised to review [notes_for_asan.txt](docs/notes_for_asan.txt) file for important
caveats.
## 4) Instrumenting binary-only apps
When source code is *NOT* available, the fuzzer offers experimental support for
fast, on-the-fly instrumentation of black-box binaries. This is accomplished
with a version of QEMU running in the lesser-known "user space emulation" mode.
QEMU is a project separate from AFL, but you can conveniently build the
feature by doing:
```shell
$ cd qemu_mode
$ ./build_qemu_support.sh
```
For additional instructions and caveats, see qemu_mode/README.qemu.
The mode is approximately 2-5x slower than compile-time instrumentation, is
less conducive to parallelization, and may have some other quirks.
## 5) Choosing initial test cases
To operate correctly, the fuzzer requires one or more starting file that
contains a good example of the input data normally expected by the targeted
application. There are two basic rules:
- Keep the files small. Under 1 kB is ideal, although not strictly necessary.
For a discussion of why size matters, see [perf_tips.txt](docs/perf_tips.txt).
- Use multiple test cases only if they are functionally different from
each other. There is no point in using fifty different vacation photos
to fuzz an image library.
You can find many good examples of starting files in the testcases/ subdirectory
that comes with this tool.
PS. If a large corpus of data is available for screening, you may want to use
the afl-cmin utility to identify a subset of functionally distinct files that
exercise different code paths in the target binary.
## 6) Fuzzing binaries
The fuzzing process itself is carried out by the afl-fuzz utility. This program
requires a read-only directory with initial test cases, a separate place to
store its findings, plus a path to the binary to test.
For target binaries that accept input directly from stdin, the usual syntax is:
```shell
$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program [...params...]
```
For programs that take input from a file, use '@@' to mark the location in
the target's command line where the input file name should be placed. The
fuzzer will substitute this for you:
```shell
$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@
```
You can also use the -f option to have the mutated data written to a specific
file. This is useful if the program expects a particular file extension or so.
Non-instrumented binaries can be fuzzed in the QEMU mode (add -Q in the command
line) or in a traditional, blind-fuzzer mode (specify -n).
You can use -t and -m to override the default timeout and memory limit for the
executed process; rare examples of targets that may need these settings touched
include compilers and video decoders.
Tips for optimizing fuzzing performance are discussed in [perf_tips.txt](docs/perf_tips.txt).
Note that afl-fuzz starts by performing an array of deterministic fuzzing
steps, which can take several days, but tend to produce neat test cases. If you
want quick & dirty results right away - akin to zzuf and other traditional
fuzzers - add the -d option to the command line.
## 7) Interpreting output
See the [status_screen.txt](docs/status_screen.txt) file for information on
how to interpret the displayed stats and monitor the health of the process.
Be sure to consult this file especially if any UI elements are highlighted in
red.
The fuzzing process will continue until you press Ctrl-C. At minimum, you want
to allow the fuzzer to complete one queue cycle, which may take anywhere from a
couple of hours to a week or so.
There are three subdirectories created within the output directory and updated
in real time:
- queue/ - test cases for every distinctive execution path, plus all the
starting files given by the user. This is the synthesized corpus
mentioned in section 2.
Before using this corpus for any other purposes, you can shrink
it to a smaller size using the afl-cmin tool. The tool will find
a smaller subset of files offering equivalent edge coverage.
- crashes/ - unique test cases that cause the tested program to receive a
fatal signal (e.g., SIGSEGV, SIGILL, SIGABRT). The entries are
grouped by the received signal.
- hangs/ - unique test cases that cause the tested program to time out. The
default time limit before something is classified as a hang is
the larger of 1 second and the value of the -t parameter.
The value can be fine-tuned by setting AFL_HANG_TMOUT, but this
is rarely necessary.
Crashes and hangs are considered "unique" if the associated execution paths
involve any state transitions not seen in previously-recorded faults. If a
single bug can be reached in multiple ways, there will be some count inflation
early in the process, but this should quickly taper off.
The file names for crashes and hangs are correlated with parent, non-faulting
queue entries. This should help with debugging.
When you can't reproduce a crash found by afl-fuzz, the most likely cause is
that you are not setting the same memory limit as used by the tool. Try:
```shell
$ LIMIT_MB=50
$ ( ulimit -Sv $[LIMIT_MB << 10]; /path/to/tested_binary ... )
```
Change LIMIT_MB to match the -m parameter passed to afl-fuzz. On OpenBSD,
also change -Sv to -Sd.
Any existing output directory can be also used to resume aborted jobs; try:
```shell
$ ./afl-fuzz -i- -o existing_output_dir [...etc...]
```
If you have gnuplot installed, you can also generate some pretty graphs for any
active fuzzing task using afl-plot. For an example of how this looks like,
see [http://lcamtuf.coredump.cx/afl/plot/](http://lcamtuf.coredump.cx/afl/plot/).
## 8) Parallelized fuzzing
Every instance of afl-fuzz takes up roughly one core. This means that on
multi-core systems, parallelization is necessary to fully utilize the hardware.
For tips on how to fuzz a common target on multiple cores or multiple networked
machines, please refer to [parallel_fuzzing.txt](docs/parallel_fuzzing.txt).
The parallel fuzzing mode also offers a simple way for interfacing AFL to other
fuzzers, to symbolic or concolic execution engines, and so forth; again, see the
last section of [parallel_fuzzing.txt](docs/parallel_fuzzing.txt) for tips.
## 9) Fuzzer dictionaries
By default, afl-fuzz mutation engine is optimized for compact data formats -
say, images, multimedia, compressed data, regular expression syntax, or shell
scripts. It is somewhat less suited for languages with particularly verbose and
redundant verbiage - notably including HTML, SQL, or JavaScript.
To avoid the hassle of building syntax-aware tools, afl-fuzz provides a way to
seed the fuzzing process with an optional dictionary of language keywords,
magic headers, or other special tokens associated with the targeted data type
-- and use that to reconstruct the underlying grammar on the go:
[http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html](http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html)
To use this feature, you first need to create a dictionary in one of the two
formats discussed in dictionaries/README.dictionaries; and then point the fuzzer
to it via the -x option in the command line.
(Several common dictionaries are already provided in that subdirectory, too.)
There is no way to provide more structured descriptions of the underlying
syntax, but the fuzzer will likely figure out some of this based on the
instrumentation feedback alone. This actually works in practice, say:
[http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html](http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html)
PS. Even when no explicit dictionary is given, afl-fuzz will try to extract
existing syntax tokens in the input corpus by watching the instrumentation
very closely during deterministic byte flips. This works for some types of
parsers and grammars, but isn't nearly as good as the -x mode.
If a dictionary is really hard to come by, another option is to let AFL run
for a while, and then use the token capture library that comes as a companion
utility with AFL. For that, see libtokencap/README.tokencap.
## 10) Crash triage
The coverage-based grouping of crashes usually produces a small data set that
can be quickly triaged manually or with a very simple GDB or Valgrind script.
Every crash is also traceable to its parent non-crashing test case in the
queue, making it easier to diagnose faults.
Having said that, it's important to acknowledge that some fuzzing crashes can be
difficult to quickly evaluate for exploitability without a lot of debugging and
code analysis work. To assist with this task, afl-fuzz supports a very unique
"crash exploration" mode enabled with the -C flag.
In this mode, the fuzzer takes one or more crashing test cases as the input,
and uses its feedback-driven fuzzing strategies to very quickly enumerate all
code paths that can be reached in the program while keeping it in the
crashing state.
Mutations that do not result in a crash are rejected; so are any changes that
do not affect the execution path.
The output is a small corpus of files that can be very rapidly examined to see
what degree of control the attacker has over the faulting address, or whether
it is possible to get past an initial out-of-bounds read - and see what lies
beneath.
Oh, one more thing: for test case minimization, give afl-tmin a try. The tool
can be operated in a very simple way:
```shell
$ ./afl-tmin -i test_case -o minimized_result -- /path/to/program [...]
```
The tool works with crashing and non-crashing test cases alike. In the crash
mode, it will happily accept instrumented and non-instrumented binaries. In the
non-crashing mode, the minimizer relies on standard AFL instrumentation to make
the file simpler without altering the execution path.
The minimizer accepts the -m, -t, -f and @@ syntax in a manner compatible with
afl-fuzz.
Another recent addition to AFL is the afl-analyze tool. It takes an input
file, attempts to sequentially flip bytes, and observes the behavior of the
tested program. It then color-codes the input based on which sections appear to
be critical, and which are not; while not bulletproof, it can often offer quick
insights into complex file formats. More info about its operation can be found
near the end of [technical_details.txt](docs/technical_details.txt).
## 11) Going beyond crashes
Fuzzing is a wonderful and underutilized technique for discovering non-crashing
design and implementation errors, too. Quite a few interesting bugs have been
found by modifying the target programs to call abort() when, say:
- Two bignum libraries produce different outputs when given the same
fuzzer-generated input,
- An image library produces different outputs when asked to decode the same
input image several times in a row,
- A serialization / deserialization library fails to produce stable outputs
when iteratively serializing and deserializing fuzzer-supplied data,
- A compression library produces an output inconsistent with the input file
when asked to compress and then decompress a particular blob.
Implementing these or similar sanity checks usually takes very little time;
if you are the maintainer of a particular package, you can make this code
conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also
shared with libfuzzer) or `#ifdef __AFL_COMPILER` (this one is just for AFL).
## 12) Common-sense risks
Please keep in mind that, similarly to many other computationally-intensive
tasks, fuzzing may put strain on your hardware and on the OS. In particular:
- Your CPU will run hot and will need adequate cooling. In most cases, if
cooling is insufficient or stops working properly, CPU speeds will be
automatically throttled. That said, especially when fuzzing on less
suitable hardware (laptops, smartphones, etc), it's not entirely impossible
for something to blow up.
- Targeted programs may end up erratically grabbing gigabytes of memory or
filling up disk space with junk files. AFL tries to enforce basic memory
limits, but can't prevent each and every possible mishap. The bottom line
is that you shouldn't be fuzzing on systems where the prospect of data loss
is not an acceptable risk.
- Fuzzing involves billions of reads and writes to the filesystem. On modern
systems, this will be usually heavily cached, resulting in fairly modest
"physical" I/O - but there are many factors that may alter this equation.
It is your responsibility to monitor for potential trouble; with very heavy
I/O, the lifespan of many HDDs and SSDs may be reduced.
A good way to monitor disk I/O on Linux is the 'iostat' command:
```shell
$ iostat -d 3 -x -k [...optional disk ID...]
```
## 13) Known limitations & areas for improvement
Here are some of the most important caveats for AFL:
- AFL detects faults by checking for the first spawned process dying due to
a signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for
these signals may need to have the relevant code commented out. In the same
vein, faults in child processed spawned by the fuzzed target may evade
detection unless you manually add some code to catch that.
- As with any other brute-force tool, the fuzzer offers limited coverage if
encryption, checksums, cryptographic signatures, or compression are used to
wholly wrap the actual data format to be tested.
To work around this, you can comment out the relevant checks (see
experimental/libpng_no_checksum/ for inspiration); if this is not possible,
you can also write a postprocessor, as explained in
experimental/post_library/.
- There are some unfortunate trade-offs with ASAN and 64-bit binaries. This
isn't due to any specific fault of afl-fuzz; see [notes_for_asan.txt](docs/notes_for_asan.txt)
for tips.
- There is no direct support for fuzzing network services, background
daemons, or interactive apps that require UI interaction to work. You may
need to make simple code changes to make them behave in a more traditional
way. Preeny may offer a relatively simple option, too - see:
https://github.com/zardus/preeny
Some useful tips for modifying network-based services can be also found at:
https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
- AFL doesn't output human-readable coverage data. If you want to monitor
coverage, use afl-cov from Michael Rash: https://github.com/mrash/afl-cov
- Occasionally, sentient machines rise against their creators. If this
happens to you, please consult http://lcamtuf.coredump.cx/prep/.
Beyond this, see INSTALL for platform-specific tips.
## 14) Special thanks
Many of the improvements to afl-fuzz wouldn't be possible without feedback,
bug reports, or patches from:
```
Jann Horn Hanno Boeck
Felix Groebert Jakub Wilk
Richard W. M. Jones Alexander Cherepanov
Tom Ritter Hovik Manucharyan
Sebastian Roschke Eberhard Mattes
Padraig Brady Ben Laurie
@dronesec Luca Barbato
Tobias Ospelt Thomas Jarosch
Martin Carpenter Mudge Zatko
Joe Zbiciak Ryan Govostes
Michael Rash William Robinet
Jonathan Gray Filipe Cabecinhas
Nico Weber Jodie Cunningham
Andrew Griffiths Parker Thompson
Jonathan Neuschfer Tyler Nighswander
Ben Nagy Samir Aguiar
Aidan Thornton Aleksandar Nikolich
Sam Hakim Laszlo Szekeres
David A. Wheeler Turo Lamminen
Andreas Stieger Richard Godbee
Louis Dassy teor2345
Alex Moneger Dmitry Vyukov
Keegan McAllister Kostya Serebryany
Richo Healey Martijn Bogaard
rc0r Jonathan Foote
Christian Holler Dominique Pelle
Jacek Wielemborek Leo Barnes
Jeremy Barnes Jeff Trull
Guillaume Endignoux ilovezfs
Daniel Godas-Lopez Franjo Ivancic
Austin Seipp Daniel Komaromy
Daniel Binderman Jonathan Metzman
Vegard Nossum Jan Kneschke
Kurt Roeckx Marcel Bohme
Van-Thuan Pham Abhik Roychoudhury
Joshua J. Drake Toby Hutton
Rene Freingruber Sergey Davidoff
Sami Liedes Craig Young
Andrzej Jackowski Daniel Hodson
```
Thank you!
## 15) Contact
Questions? Concerns? Bug reports? Please use GitHub.
There is also a mailing list for the project; to join, send a mail to
<afl-users+subscribe@googlegroups.com>. Or, if you prefer to browse
archives first, try: [https://groups.google.com/group/afl-users](https://groups.google.com/group/afl-users).

@ -1,493 +0,0 @@
# american fuzzy lop
[![Build Status](https://travis-ci.org/google/AFL.svg?branch=master)](https://travis-ci.org/google/AFL)
Originally developed by Michal Zalewski <lcamtuf@google.com>.
See [QuickStartGuide.txt](docs/QuickStartGuide.txt) if you don't have time to read
this file.
## 1) Challenges of guided fuzzing
Fuzzing is one of the most powerful and proven strategies for identifying
security issues in real-world software; it is responsible for the vast
majority of remote code execution and privilege escalation bugs found to date
in security-critical software.
Unfortunately, fuzzing is also relatively shallow; blind, random mutations
make it very unlikely to reach certain code paths in the tested code, leaving
some vulnerabilities firmly outside the reach of this technique.
There have been numerous attempts to solve this problem. One of the early
approaches - pioneered by Tavis Ormandy - is corpus distillation. The method
relies on coverage signals to select a subset of interesting seeds from a
massive, high-quality corpus of candidate files, and then fuzz them by
traditional means. The approach works exceptionally well, but requires such
a corpus to be readily available. In addition, block coverage measurements
provide only a very simplistic understanding of program state, and are less
useful for guiding the fuzzing effort in the long haul.
Other, more sophisticated research has focused on techniques such as program
flow analysis ("concolic execution"), symbolic execution, or static analysis.
All these methods are extremely promising in experimental settings, but tend
to suffer from reliability and performance problems in practical uses - and
currently do not offer a viable alternative to "dumb" fuzzing techniques.
## 2) The afl-fuzz approach
American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple
but rock-solid instrumentation-guided genetic algorithm. It uses a modified
form of edge coverage to effortlessly pick up subtle, local-scale changes to
program control flow.
Simplifying a bit, the overall algorithm can be summed up as:
1) Load user-supplied initial test cases into the queue,
2) Take next input file from the queue,
3) Attempt to trim the test case to the smallest size that doesn't alter
the measured behavior of the program,
4) Repeatedly mutate the file using a balanced and well-researched variety
of traditional fuzzing strategies,
5) If any of the generated mutations resulted in a new state transition
recorded by the instrumentation, add mutated output as a new entry in the
queue.
6) Go to 2.
The discovered test cases are also periodically culled to eliminate ones that
have been obsoleted by newer, higher-coverage finds; and undergo several other
instrumentation-driven effort minimization steps.
As a side result of the fuzzing process, the tool creates a small,
self-contained corpus of interesting test cases. These are extremely useful
for seeding other, labor- or resource-intensive testing regimes - for example,
for stress-testing browsers, office applications, graphics suites, or
closed-source tools.
The fuzzer is thoroughly tested to deliver out-of-the-box performance far
superior to blind fuzzing or coverage-only tools.
## 3) Instrumenting programs for use with AFL
When source code is available, instrumentation can be injected by a companion
tool that works as a drop-in replacement for gcc or clang in any standard build
process for third-party code.
The instrumentation has a fairly modest performance impact; in conjunction with
other optimizations implemented by afl-fuzz, most programs can be fuzzed as fast
or even faster than possible with traditional tools.
The correct way to recompile the target program may vary depending on the
specifics of the build process, but a nearly-universal approach would be:
```shell
$ CC=/path/to/afl/afl-gcc ./configure
$ make clean all
```
For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`.
The clang wrappers (afl-clang and afl-clang++) can be used in the same way;
clang users may also opt to leverage a higher-performance instrumentation mode,
as described in llvm_mode/README.llvm.
When testing libraries, you need to find or write a simple program that reads
data from stdin or from a file and passes it to the tested library. In such a
case, it is essential to link this executable against a static version of the
instrumented library, or to make sure that the correct .so file is loaded at
runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static
build, usually possible via:
```shell
$ CC=/path/to/afl/afl-gcc ./configure --disable-shared
```
Setting `AFL_HARDEN=1` when calling 'make' will cause the CC wrapper to
automatically enable code hardening options that make it easier to detect
simple memory bugs. Libdislocator, a helper library included with AFL (see
libdislocator/README.dislocator) can help uncover heap corruption issues, too.
PS. ASAN users are advised to review [notes_for_asan.txt](docs/notes_for_asan.txt) file for important
caveats.
## 4) Instrumenting binary-only apps
When source code is *NOT* available, the fuzzer offers experimental support for
fast, on-the-fly instrumentation of black-box binaries. This is accomplished
with a version of QEMU running in the lesser-known "user space emulation" mode.
QEMU is a project separate from AFL, but you can conveniently build the
feature by doing:
```shell
$ cd qemu_mode
$ ./build_qemu_support.sh
```
For additional instructions and caveats, see qemu_mode/README.qemu.
The mode is approximately 2-5x slower than compile-time instrumentation, is
less conducive to parallelization, and may have some other quirks.
## 5) Choosing initial test cases
To operate correctly, the fuzzer requires one or more starting file that
contains a good example of the input data normally expected by the targeted
application. There are two basic rules:
- Keep the files small. Under 1 kB is ideal, although not strictly necessary.
For a discussion of why size matters, see [perf_tips.txt](docs/perf_tips.txt).
- Use multiple test cases only if they are functionally different from
each other. There is no point in using fifty different vacation photos
to fuzz an image library.
You can find many good examples of starting files in the testcases/ subdirectory
that comes with this tool.
PS. If a large corpus of data is available for screening, you may want to use
the afl-cmin utility to identify a subset of functionally distinct files that
exercise different code paths in the target binary.
## 6) Fuzzing binaries
The fuzzing process itself is carried out by the afl-fuzz utility. This program
requires a read-only directory with initial test cases, a separate place to
store its findings, plus a path to the binary to test.
For target binaries that accept input directly from stdin, the usual syntax is:
```shell
$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program [...params...]
```
For programs that take input from a file, use '@@' to mark the location in
the target's command line where the input file name should be placed. The
fuzzer will substitute this for you:
```shell
$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@
```
You can also use the -f option to have the mutated data written to a specific
file. This is useful if the program expects a particular file extension or so.
Non-instrumented binaries can be fuzzed in the QEMU mode (add -Q in the command
line) or in a traditional, blind-fuzzer mode (specify -n).
You can use -t and -m to override the default timeout and memory limit for the
executed process; rare examples of targets that may need these settings touched
include compilers and video decoders.
Tips for optimizing fuzzing performance are discussed in [perf_tips.txt](docs/perf_tips.txt).
Note that afl-fuzz starts by performing an array of deterministic fuzzing
steps, which can take several days, but tend to produce neat test cases. If you
want quick & dirty results right away - akin to zzuf and other traditional
fuzzers - add the -d option to the command line.
## 7) Interpreting output
See the [status_screen.txt](docs/status_screen.txt) file for information on
how to interpret the displayed stats and monitor the health of the process.
Be sure to consult this file especially if any UI elements are highlighted in
red.
The fuzzing process will continue until you press Ctrl-C. At minimum, you want
to allow the fuzzer to complete one queue cycle, which may take anywhere from a
couple of hours to a week or so.
There are three subdirectories created within the output directory and updated
in real time:
- queue/ - test cases for every distinctive execution path, plus all the
starting files given by the user. This is the synthesized corpus
mentioned in section 2.
Before using this corpus for any other purposes, you can shrink
it to a smaller size using the afl-cmin tool. The tool will find
a smaller subset of files offering equivalent edge coverage.
- crashes/ - unique test cases that cause the tested program to receive a
fatal signal (e.g., SIGSEGV, SIGILL, SIGABRT). The entries are
grouped by the received signal.
- hangs/ - unique test cases that cause the tested program to time out. The
default time limit before something is classified as a hang is
the larger of 1 second and the value of the -t parameter.
The value can be fine-tuned by setting AFL_HANG_TMOUT, but this
is rarely necessary.
Crashes and hangs are considered "unique" if the associated execution paths
involve any state transitions not seen in previously-recorded faults. If a
single bug can be reached in multiple ways, there will be some count inflation
early in the process, but this should quickly taper off.
The file names for crashes and hangs are correlated with parent, non-faulting
queue entries. This should help with debugging.
When you can't reproduce a crash found by afl-fuzz, the most likely cause is
that you are not setting the same memory limit as used by the tool. Try:
```shell
$ LIMIT_MB=50
$ ( ulimit -Sv $[LIMIT_MB << 10]; /path/to/tested_binary ... )
```
Change LIMIT_MB to match the -m parameter passed to afl-fuzz. On OpenBSD,
also change -Sv to -Sd.
Any existing output directory can be also used to resume aborted jobs; try:
```shell
$ ./afl-fuzz -i- -o existing_output_dir [...etc...]
```
If you have gnuplot installed, you can also generate some pretty graphs for any
active fuzzing task using afl-plot. For an example of how this looks like,
see [http://lcamtuf.coredump.cx/afl/plot/](http://lcamtuf.coredump.cx/afl/plot/).
## 8) Parallelized fuzzing
Every instance of afl-fuzz takes up roughly one core. This means that on
multi-core systems, parallelization is necessary to fully utilize the hardware.
For tips on how to fuzz a common target on multiple cores or multiple networked
machines, please refer to [parallel_fuzzing.txt](docs/parallel_fuzzing.txt).
The parallel fuzzing mode also offers a simple way for interfacing AFL to other
fuzzers, to symbolic or concolic execution engines, and so forth; again, see the
last section of [parallel_fuzzing.txt](docs/parallel_fuzzing.txt) for tips.
## 9) Fuzzer dictionaries
By default, afl-fuzz mutation engine is optimized for compact data formats -
say, images, multimedia, compressed data, regular expression syntax, or shell
scripts. It is somewhat less suited for languages with particularly verbose and
redundant verbiage - notably including HTML, SQL, or JavaScript.
To avoid the hassle of building syntax-aware tools, afl-fuzz provides a way to
seed the fuzzing process with an optional dictionary of language keywords,
magic headers, or other special tokens associated with the targeted data type
-- and use that to reconstruct the underlying grammar on the go:
[http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html](http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html)
To use this feature, you first need to create a dictionary in one of the two
formats discussed in dictionaries/README.dictionaries; and then point the fuzzer
to it via the -x option in the command line.
(Several common dictionaries are already provided in that subdirectory, too.)
There is no way to provide more structured descriptions of the underlying
syntax, but the fuzzer will likely figure out some of this based on the
instrumentation feedback alone. This actually works in practice, say:
[http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html](http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html)
PS. Even when no explicit dictionary is given, afl-fuzz will try to extract
existing syntax tokens in the input corpus by watching the instrumentation
very closely during deterministic byte flips. This works for some types of
parsers and grammars, but isn't nearly as good as the -x mode.
If a dictionary is really hard to come by, another option is to let AFL run
for a while, and then use the token capture library that comes as a companion
utility with AFL. For that, see libtokencap/README.tokencap.
## 10) Crash triage
The coverage-based grouping of crashes usually produces a small data set that
can be quickly triaged manually or with a very simple GDB or Valgrind script.
Every crash is also traceable to its parent non-crashing test case in the
queue, making it easier to diagnose faults.
Having said that, it's important to acknowledge that some fuzzing crashes can be
difficult to quickly evaluate for exploitability without a lot of debugging and
code analysis work. To assist with this task, afl-fuzz supports a very unique
"crash exploration" mode enabled with the -C flag.
In this mode, the fuzzer takes one or more crashing test cases as the input,
and uses its feedback-driven fuzzing strategies to very quickly enumerate all
code paths that can be reached in the program while keeping it in the
crashing state.
Mutations that do not result in a crash are rejected; so are any changes that
do not affect the execution path.
The output is a small corpus of files that can be very rapidly examined to see
what degree of control the attacker has over the faulting address, or whether
it is possible to get past an initial out-of-bounds read - and see what lies
beneath.
Oh, one more thing: for test case minimization, give afl-tmin a try. The tool
can be operated in a very simple way:
```shell
$ ./afl-tmin -i test_case -o minimized_result -- /path/to/program [...]
```
The tool works with crashing and non-crashing test cases alike. In the crash
mode, it will happily accept instrumented and non-instrumented binaries. In the
non-crashing mode, the minimizer relies on standard AFL instrumentation to make
the file simpler without altering the execution path.
The minimizer accepts the -m, -t, -f and @@ syntax in a manner compatible with
afl-fuzz.
Another recent addition to AFL is the afl-analyze tool. It takes an input
file, attempts to sequentially flip bytes, and observes the behavior of the
tested program. It then color-codes the input based on which sections appear to
be critical, and which are not; while not bulletproof, it can often offer quick
insights into complex file formats. More info about its operation can be found
near the end of [technical_details.txt](docs/technical_details.txt).
## 11) Going beyond crashes
Fuzzing is a wonderful and underutilized technique for discovering non-crashing
design and implementation errors, too. Quite a few interesting bugs have been
found by modifying the target programs to call abort() when, say:
- Two bignum libraries produce different outputs when given the same
fuzzer-generated input,
- An image library produces different outputs when asked to decode the same
input image several times in a row,
- A serialization / deserialization library fails to produce stable outputs
when iteratively serializing and deserializing fuzzer-supplied data,
- A compression library produces an output inconsistent with the input file
when asked to compress and then decompress a particular blob.
Implementing these or similar sanity checks usually takes very little time;
if you are the maintainer of a particular package, you can make this code
conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also
shared with libfuzzer) or `#ifdef __AFL_COMPILER` (this one is just for AFL).
## 12) Common-sense risks
Please keep in mind that, similarly to many other computationally-intensive
tasks, fuzzing may put strain on your hardware and on the OS. In particular:
- Your CPU will run hot and will need adequate cooling. In most cases, if
cooling is insufficient or stops working properly, CPU speeds will be
automatically throttled. That said, especially when fuzzing on less
suitable hardware (laptops, smartphones, etc), it's not entirely impossible
for something to blow up.
- Targeted programs may end up erratically grabbing gigabytes of memory or
filling up disk space with junk files. AFL tries to enforce basic memory
limits, but can't prevent each and every possible mishap. The bottom line
is that you shouldn't be fuzzing on systems where the prospect of data loss
is not an acceptable risk.
- Fuzzing involves billions of reads and writes to the filesystem. On modern
systems, this will be usually heavily cached, resulting in fairly modest
"physical" I/O - but there are many factors that may alter this equation.
It is your responsibility to monitor for potential trouble; with very heavy
I/O, the lifespan of many HDDs and SSDs may be reduced.
A good way to monitor disk I/O on Linux is the 'iostat' command:
```shell
$ iostat -d 3 -x -k [...optional disk ID...]
```
## 13) Known limitations & areas for improvement
Here are some of the most important caveats for AFL:
- AFL detects faults by checking for the first spawned process dying due to
a signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for
these signals may need to have the relevant code commented out. In the same
vein, faults in child processed spawned by the fuzzed target may evade
detection unless you manually add some code to catch that.
- As with any other brute-force tool, the fuzzer offers limited coverage if
encryption, checksums, cryptographic signatures, or compression are used to
wholly wrap the actual data format to be tested.
To work around this, you can comment out the relevant checks (see
experimental/libpng_no_checksum/ for inspiration); if this is not possible,
you can also write a postprocessor, as explained in
experimental/post_library/.
- There are some unfortunate trade-offs with ASAN and 64-bit binaries. This
isn't due to any specific fault of afl-fuzz; see [notes_for_asan.txt](docs/notes_for_asan.txt)
for tips.
- There is no direct support for fuzzing network services, background
daemons, or interactive apps that require UI interaction to work. You may
need to make simple code changes to make them behave in a more traditional
way. Preeny may offer a relatively simple option, too - see:
https://github.com/zardus/preeny
Some useful tips for modifying network-based services can be also found at:
https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
- AFL doesn't output human-readable coverage data. If you want to monitor
coverage, use afl-cov from Michael Rash: https://github.com/mrash/afl-cov
- Occasionally, sentient machines rise against their creators. If this
happens to you, please consult http://lcamtuf.coredump.cx/prep/.
Beyond this, see INSTALL for platform-specific tips.
## 14) Special thanks
Many of the improvements to afl-fuzz wouldn't be possible without feedback,
bug reports, or patches from:
```
Jann Horn Hanno Boeck
Felix Groebert Jakub Wilk
Richard W. M. Jones Alexander Cherepanov
Tom Ritter Hovik Manucharyan
Sebastian Roschke Eberhard Mattes
Padraig Brady Ben Laurie
@dronesec Luca Barbato
Tobias Ospelt Thomas Jarosch
Martin Carpenter Mudge Zatko
Joe Zbiciak Ryan Govostes
Michael Rash William Robinet
Jonathan Gray Filipe Cabecinhas
Nico Weber Jodie Cunningham
Andrew Griffiths Parker Thompson
Jonathan Neuschfer Tyler Nighswander
Ben Nagy Samir Aguiar
Aidan Thornton Aleksandar Nikolich
Sam Hakim Laszlo Szekeres
David A. Wheeler Turo Lamminen
Andreas Stieger Richard Godbee
Louis Dassy teor2345
Alex Moneger Dmitry Vyukov
Keegan McAllister Kostya Serebryany
Richo Healey Martijn Bogaard
rc0r Jonathan Foote
Christian Holler Dominique Pelle
Jacek Wielemborek Leo Barnes
Jeremy Barnes Jeff Trull
Guillaume Endignoux ilovezfs
Daniel Godas-Lopez Franjo Ivancic
Austin Seipp Daniel Komaromy
Daniel Binderman Jonathan Metzman
Vegard Nossum Jan Kneschke
Kurt Roeckx Marcel Bohme
Van-Thuan Pham Abhik Roychoudhury
Joshua J. Drake Toby Hutton
Rene Freingruber Sergey Davidoff
Sami Liedes Craig Young
Andrzej Jackowski Daniel Hodson
```
Thank you!
## 15) Contact
Questions? Concerns? Bug reports? Please use GitHub.
There is also a mailing list for the project; to join, send a mail to
<afl-users+subscribe@googlegroups.com>. Or, if you prefer to browse
archives first, try: [https://groups.google.com/group/afl-users](https://groups.google.com/group/afl-users).

75
as

@ -1,75 +0,0 @@
#!/bin/sh
#
# american fuzzy lop - clang assembly normalizer
# ----------------------------------------------
#
# Written and maintained by Michal Zalewski <lcamtuf@google.com>
# The idea for this wrapper comes from Ryan Govostes.
#
# Copyright 2013, 2014 Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# This 'as' wrapper should allow you to instrument unruly, hand-written
# assembly with afl-as.
#
# Usage:
#
# export AFL_REAL_PATH=/path/to/directory/with/afl-as/
# AFL_PATH=/path/to/this/directory/ make clean all
if [ "$#" -lt "2" ]; then
echo "[-] Error: this utility can't be called directly." 1>&2
exit 1
fi
if [ "$AFL_REAL_PATH" = "" ]; then
echo "[-] Error: AFL_REAL_PATH not set!" 1>&2
exit 1
fi
if [ ! -x "$AFL_REAL_PATH/afl-as" ]; then
echo "[-] Error: AFL_REAL_PATH does not contain the 'afl-as' binary." 1>&2
exit 1
fi
unset __AFL_AS_CMDLINE __AFL_FNAME
while [ ! "$#" = "0" ]; do
if [ "$#" = "1" ]; then
__AFL_FNAME="$1"
else
__AFL_AS_CMDLINE="${__AFL_AS_CMDLINE} $1"
fi
shift
done
test "$TMPDIR" = "" && TMPDIR=/tmp
TMPFILE=`mktemp $TMPDIR/.afl-XXXXXXXXXX.s`
test "$TMPFILE" = "" && exit 1
clang -cc1as -filetype asm -output-asm-variant 0 "${__AFL_FNAME}" >"$TMPFILE"
ERR="$?"
if [ ! "$ERR" = "0" ]; then
rm -f "$TMPFILE"
exit $ERR
fi
"$AFL_REAL_PATH/afl-as" ${__AFL_AS_CMDLINE} "$TMPFILE"
ERR="$?"
rm -f "$TMPFILE"
exit "$ERR"

@ -0,0 +1,43 @@
================
AFL dictionaries
================
(See ../docs/README for the general instruction manual.)
This subdirectory contains a set of dictionaries that can be used in
conjunction with the -x option to allow the fuzzer to effortlessly explore the
grammar of some of the more verbose data formats or languages. The basic
principle behind the operation of fuzzer dictionaries is outlined in section 9
of the "main" README for the project.
Custom dictionaries can be added at will. They should consist of a
reasonably-sized set of rudimentary syntax units that the fuzzer will then try
to clobber together in various ways. Snippets between 2 and 16 bytes are usually
the sweet spot.
Custom dictionaries can be created in two ways:
- By creating a new directory and placing each token in a separate file, in
which case, there is no need to escape or otherwise format the data.
- By creating a flat text file where tokens are listed one per line in the
format of name="value". The alphanumeric name is ignored and can be omitted,
although it is a convenient way to document the meaning of a particular
token. The value must appear in quotes, with hex escaping (\xNN) applied to
all non-printable, high-bit, or otherwise problematic characters (\\ and \"
shorthands are recognized, too).
The fuzzer auto-selects the appropriate mode depending on whether the -x
parameter is a file or a directory.
In the file mode, every name field can be optionally followed by @<num>, e.g.:
keyword_foo@1 = "foo"
Such entries will be loaded only if the requested dictionary level is equal or
higher than this number. The default level is zero; a higher value can be set
by appending @<num> to the dictionary file name, like so:
-x path/to/dictionary.dct@2
Good examples of dictionaries can be found in xml.dict and png.dict.

@ -0,0 +1,18 @@
#
# AFL dictionary for GIF images
# -----------------------------
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
header_87a="87a"
header_89a="89a"
header_gif="GIF"
marker_2c=","
marker_3b=";"
section_2101="!\x01\x12"
section_21f9="!\xf9\x04"
section_21fe="!\xfe"
section_21ff="!\xff\x11"

@ -0,0 +1,160 @@
#
# AFL dictionary for HTML parsers (tags only)
# -------------------------------------------
#
# A basic collection of HTML tags likely to matter to HTML parsers. Does *not*
# include any attributes or attribute values.
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
tag_a="<a>"
tag_abbr="<abbr>"
tag_acronym="<acronym>"
tag_address="<address>"
tag_annotation_xml="<annotation-xml>"
tag_applet="<applet>"
tag_area="<area>"
tag_article="<article>"
tag_aside="<aside>"
tag_audio="<audio>"
tag_b="<b>"
tag_base="<base>"
tag_basefont="<basefont>"
tag_bdi="<bdi>"
tag_bdo="<bdo>"
tag_bgsound="<bgsound>"
tag_big="<big>"
tag_blink="<blink>"
tag_blockquote="<blockquote>"
tag_body="<body>"
tag_br="<br>"
tag_button="<button>"
tag_canvas="<canvas>"
tag_caption="<caption>"
tag_center="<center>"
tag_cite="<cite>"
tag_code="<code>"
tag_col="<col>"
tag_colgroup="<colgroup>"
tag_data="<data>"
tag_datalist="<datalist>"
tag_dd="<dd>"
tag_del="<del>"
tag_desc="<desc>"
tag_details="<details>"
tag_dfn="<dfn>"
tag_dir="<dir>"
tag_div="<div>"
tag_dl="<dl>"
tag_dt="<dt>"
tag_em="<em>"
tag_embed="<embed>"
tag_fieldset="<fieldset>"
tag_figcaption="<figcaption>"
tag_figure="<figure>"
tag_font="<font>"
tag_footer="<footer>"
tag_foreignobject="<foreignobject>"
tag_form="<form>"
tag_frame="<frame>"
tag_frameset="<frameset>"
tag_h1="<h1>"
tag_h2="<h2>"
tag_h3="<h3>"
tag_h4="<h4>"
tag_h5="<h5>"
tag_h6="<h6>"
tag_head="<head>"
tag_header="<header>"
tag_hgroup="<hgroup>"
tag_hr="<hr>"
tag_html="<html>"
tag_i="<i>"
tag_iframe="<iframe>"
tag_image="<image>"
tag_img="<img>"
tag_input="<input>"
tag_ins="<ins>"
tag_isindex="<isindex>"
tag_kbd="<kbd>"
tag_keygen="<keygen>"
tag_label="<label>"
tag_legend="<legend>"
tag_li="<li>"
tag_link="<link>"
tag_listing="<listing>"
tag_main="<main>"
tag_malignmark="<malignmark>"
tag_map="<map>"
tag_mark="<mark>"
tag_marquee="<marquee>"
tag_math="<math>"
tag_menu="<menu>"
tag_menuitem="<menuitem>"
tag_meta="<meta>"
tag_meter="<meter>"
tag_mglyph="<mglyph>"
tag_mi="<mi>"
tag_mn="<mn>"
tag_mo="<mo>"
tag_ms="<ms>"
tag_mtext="<mtext>"
tag_multicol="<multicol>"
tag_nav="<nav>"
tag_nextid="<nextid>"
tag_nobr="<nobr>"
tag_noembed="<noembed>"
tag_noframes="<noframes>"
tag_noscript="<noscript>"
tag_object="<object>"
tag_ol="<ol>"
tag_optgroup="<optgroup>"
tag_option="<option>"
tag_output="<output>"
tag_p="<p>"
tag_param="<param>"
tag_plaintext="<plaintext>"
tag_pre="<pre>"
tag_progress="<progress>"
tag_q="<q>"
tag_rb="<rb>"
tag_rp="<rp>"
tag_rt="<rt>"
tag_rtc="<rtc>"
tag_ruby="<ruby>"
tag_s="<s>"
tag_samp="<samp>"
tag_script="<script>"
tag_section="<section>"
tag_select="<select>"
tag_small="<small>"
tag_source="<source>"
tag_spacer="<spacer>"
tag_span="<span>"
tag_strike="<strike>"
tag_strong="<strong>"
tag_style="<style>"
tag_sub="<sub>"
tag_summary="<summary>"
tag_sup="<sup>"
tag_svg="<svg>"
tag_table="<table>"
tag_tbody="<tbody>"
tag_td="<td>"
tag_template="<template>"
tag_textarea="<textarea>"
tag_tfoot="<tfoot>"
tag_th="<th>"
tag_thead="<thead>"
tag_time="<time>"
tag_title="<title>"
tag_tr="<tr>"
tag_track="<track>"
tag_tt="<tt>"
tag_u="<u>"
tag_ul="<ul>"
tag_var="<var>"
tag_video="<video>"
tag_wbr="<wbr>"
tag_xmp="<xmp>"

@ -0,0 +1,22 @@
#
# AFL dictionary for JPEG images
# ------------------------------
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
header_jfif="JFIF\x00"
header_jfxx="JFXX\x00"
section_ffc0="\xff\xc0"
section_ffc2="\xff\xc2"
section_ffc4="\xff\xc4"
section_ffd0="\xff\xd0"
section_ffd8="\xff\xd8"
section_ffd9="\xff\xd9"
section_ffda="\xff\xda"
section_ffdb="\xff\xdb"
section_ffdd="\xff\xdd"
section_ffe0="\xff\xe0"
section_ffe1="\xff\xe1"
section_fffe="\xff\xfe"

@ -0,0 +1,107 @@
#
# AFL dictionary for JavaScript
# -----------------------------
#
# Contains basic reserved keywords and syntax building blocks.
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
keyword_arguments="arguments"
keyword_break="break"
keyword_case="case"
keyword_catch="catch"
keyword_const="const"
keyword_continue="continue"
keyword_debugger="debugger"
keyword_decodeURI="decodeURI"
keyword_default="default"
keyword_delete="delete"
keyword_do="do"
keyword_else="else"
keyword_escape="escape"
keyword_eval="eval"
keyword_export="export"
keyword_finally="finally"
keyword_for="for (a=0;a<2;a++)"
keyword_function="function"
keyword_if="if"
keyword_in="in"
keyword_instanceof="instanceof"
keyword_isNaN="isNaN"
keyword_let="let"
keyword_new="new"
keyword_parseInt="parseInt"
keyword_return="return"
keyword_switch="switch"
keyword_this="this"
keyword_throw="throw"
keyword_try="try"
keyword_typeof="typeof"
keyword_var="var"
keyword_void="void"
keyword_while="while"
keyword_with="with"
misc_1=" 1"
misc_a="a"
misc_array=" [1]"
misc_assign=" a=1"
misc_code_block=" {1}"
misc_colon_num=" 1:"
misc_colon_string=" 'a':"
misc_comma=" ,"
misc_comment_block=" /* */"
misc_comment_line=" //"
misc_cond=" 1?2:3"
misc_dec=" --"
misc_div=" /"
misc_equals=" ="
misc_fn=" a()"
misc_identical=" ==="
misc_inc=" ++"
misc_minus=" -"
misc_modulo=" %"
misc_parentheses=" ()"
misc_parentheses_1=" (1)"
misc_parentheses_1x4=" (1,1,1,1)"
misc_parentheses_a=" (a)"
misc_period="."
misc_plus=" +"
misc_plus_assign=" +="
misc_regex=" /a/g"
misc_rol=" <<<"
misc_semicolon=" ;"
misc_serialized_object=" {'a': 1}"
misc_string=" 'a'"
misc_unicode=" '\\u0001'"
object_Array=" Array"
object_Boolean=" Boolean"
object_Date=" Date"
object_Function=" Function"
object_Infinity=" Infinity"
object_Int8Array=" Int8Array"
object_Math=" Math"
object_NaN=" NaN"
object_Number=" Number"
object_Object=" Object"
object_RegExp=" RegExp"
object_String=" String"
object_Symbol=" Symbol"
object_false=" false"
object_null=" null"
object_true=" true"
prop_charAt=".charAt"
prop_concat=".concat"
prop_constructor=".constructor"
prop_destructor=".destructor"
prop_length=".length"
prop_match=".match"
prop_proto=".__proto__"
prop_prototype=".prototype"
prop_slice=".slice"
prop_toCode=".toCode"
prop_toString=".toString"
prop_valueOf=".valueOf"

@ -0,0 +1,52 @@
#
# AFL dictionary for JSON
# -----------------------
#
# Just the very basics.
#
# Inspired by a dictionary by Jakub Wilk <jwilk@jwilk.net>
#
"0"
",0"
":0"
"0:"
"-1.2e+3"
"true"
"false"
"null"
"\"\""
",\"\""
":\"\""
"\"\":"
"{}"
",{}"
":{}"
"{\"\":0}"
"{{}}"
"[]"
",[]"
":[]"
"[0]"
"[[]]"
"''"
"\\"
"\\b"
"\\f"
"\\n"
"\\r"
"\\t"
"\\u0000"
"\\x00"
"\\0"
"\\uD800\\uDC00"
"\\uDBFF\\uDFFF"
"\"\":0"
"//"
"/**/"

File diff suppressed because it is too large Load Diff

@ -0,0 +1,16 @@
#
# AFL dictionary for fuzzing Perl
# --------------------------------
#
# Created by @RandomDhiraj
#
"<:crlf"
"fwrite()"
"fread()"
":raw:utf8"
":raw:eol(LF)"
"Perl_invert()"
":raw:eol(CRLF)"
"Perl_PerlIO_eof()"

@ -0,0 +1,38 @@
#
# AFL dictionary for PNG images
# -----------------------------
#
# Just the basic, standard-originating sections; does not include vendor
# extensions.
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
header_png="\x89PNG\x0d\x0a\x1a\x0a"
section_IDAT="IDAT"
section_IEND="IEND"
section_IHDR="IHDR"
section_PLTE="PLTE"
section_bKGD="bKGD"
section_cHRM="cHRM"
section_fRAc="fRAc"
section_gAMA="gAMA"
section_gIFg="gIFg"
section_gIFt="gIFt"
section_gIFx="gIFx"
section_hIST="hIST"
section_iCCP="iCCP"
section_iTXt="iTXt"
section_oFFs="oFFs"
section_pCAL="pCAL"
section_pHYs="pHYs"
section_sBIT="sBIT"
section_sCAL="sCAL"
section_sPLT="sPLT"
section_sRGB="sRGB"
section_sTER="sTER"
section_tEXt="tEXt"
section_tIME="tIME"
section_tRNS="tRNS"
section_zTXt="zTXt"

@ -0,0 +1,254 @@
#
# AFL dictionary for JS regex
# ---------------------------
#
# Contains various regular expressions.
#
# Created by Yang Guo <yangguo@chromium.org>
#
"?"
"abc"
"()"
"[]"
"abc|def"
"abc|def|ghi"
"^xxx$"
"ab\\b\\d\\bcd"
"\\w|\\d"
"a*?"
"abc+"
"abc+?"
"xyz?"
"xyz??"
"xyz{0,1}"
"xyz{0,1}?"
"xyz{93}"
"xyz{1,32}"
"xyz{1,32}?"
"xyz{1,}"
"xyz{1,}?"
"a\\fb\\nc\\rd\\te\\vf"
"a\\nb\\bc"
"(?:foo)"
"(?: foo )"
"foo|(bar|baz)|quux"
"foo(?=bar)baz"
"foo(?!bar)baz"
"foo(?<=bar)baz"
"foo(?<!bar)baz"
"()"
"(?=)"
"[]"
"[x]"
"[xyz]"
"[a-zA-Z0-9]"
"[-123]"
"[^123]"
"]"
"}"
"[a-b-c]"
"[x\\dz]"
"[\\d-z]"
"[\\d-\\d]"
"[z-\\d]"
"\\cj\\cJ\\ci\\cI\\ck\\cK"
"\\c!"
"\\c_"
"\\c~"
"[\\c!]"
"[\\c_]"
"[\\c~]"
"[\\ca]"
"[\\cz]"
"[\\cA]"
"[\\cZ]"
"[\\c1]"
"\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ "
"[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]"
"\\8"
"\\9"
"\\11"
"\\11a"
"\\011"
"\\118"
"\\111"
"\\1111"
"(x)(x)(x)\\1"
"(x)(x)(x)\\2"
"(x)(x)(x)\\3"
"(x)(x)(x)\\4"
"(x)(x)(x)\\1*"
"(x)(x)(x)\\3*"
"(x)(x)(x)\\4*"
"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10"
"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11"
"(a)\\1"
"(a\\1)"
"(\\1a)"
"(\\2)(\\1)"
"(?=a){0,10}a"
"(?=a){1,10}a"
"(?=a){9,10}a"
"(?!a)?a"
"\\1(a)"
"(?!(a))\\1"
"(?!\\1(a\\1)\\1)\\1"
"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
"[\\0]"
"[\\11]"
"[\\11a]"
"[\\011]"
"[\\00011]"
"[\\118]"
"[\\111]"
"[\\1111]"
"\\x60"
"\\x3z"
"\\c"
"\\u0034"
"\\u003z"
"foo[z]*"
"\\u{12345}"
"\\u{12345}\\u{23456}"
"\\u{12345}{3}"
"\\u{12345}*"
"\\ud808\\udf45*"
"[\\ud808\\udf45-\\ud809\\udccc]"
"a"
"a|b"
"a\\n"
"a$"
"a\\b!"
"a\\Bb"
"a*?"
"a?"
"a??"
"a{0,1}?"
"a{1,2}?"
"a+?"
"(a)"
"(a)\\1"
"(\\1a)"
"\\1(a)"
"a\\s"
"a\\S"
"a\\D"
"a\\w"
"a\\W"
"a."
"a\\q"
"a[a]"
"a[^a]"
"a[a-z]"
"a(?:b)"
"a(?=b)"
"a(?!b)"
"\\x60"
"\\u0060"
"\\cA"
"\\q"
"\\1112"
"(a)\\1"
"(?!a)?a\\1"
"(?:(?=a))a\\1"
"a{}"
"a{,}"
"a{"
"a{z}"
"a{12z}"
"a{12,"
"a{12,3b"
"{}"
"{,}"
"{"
"{z}"
"{1z}"
"{12,"
"{12,3b"
"a"
"abc"
"a[bc]d"
"a|bc"
"ab|c"
"a||bc"
"(?:ab)"
"(?:ab|cde)"
"(?:ab)|cde"
"(ab)"
"(ab|cde)"
"(ab)\\1"
"(ab|cde)\\1"
"(?:ab)?"
"(?:ab)+"
"a?"
"a+"
"a??"
"a*?"
"a+?"
"(?:a?)?"
"(?:a+)?"
"(?:a?)+"
"(?:a*)+"
"(?:a+)+"
"(?:a?)*"
"(?:a*)*"
"(?:a+)*"
"a{0}"
"(?:a+){0,0}"
"a*b"
"a+b"
"a*b|c"
"a+b|c"
"(?:a{5,1000000}){3,1000000}"
"(?:ab){4,7}"
"a\\bc"
"a\\sc"
"a\\Sc"
"a(?=b)c"
"a(?=bbb|bb)c"
"a(?!bbb|bb)c"
"\xe2\x81\xa3"
"[\xe2\x81\xa3]"
"\xed\xb0\x80"
"\xed\xa0\x80"
"(\xed\xb0\x80)\x01"
"((\xed\xa0\x80))\x02"
"\xf0\x9f\x92\xa9"
"\x01"
"\x0f"
"[-\xf0\x9f\x92\xa9]+"
"[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\xbf]"
"(?<=)"
"(?<=a)"
"(?<!)"
"(?<!a)"
"(?<a>)"
"(?<a>.)"
"(?<a>.)\\k<a>"
"\\p{Script=Greek}"
"\\P{sc=Greek}"
"\\p{Script_Extensions=Greek}"
"\\P{scx=Greek}"
"\\p{General_Category=Decimal_Number}"
"\\P{gc=Decimal_Number}"
"\\p{gc=Nd}"
"\\P{Decimal_Number}"
"\\p{Nd}"
"\\P{Any}"
"\\p{Changes_When_NFKC_Casefolded}"
"(?:a?)??"
"a?)"xyz{93}"
"{93}"
"a{12za?)?"
"[\x8f]"
"[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\x92\xa9-\xf4\x8f\xbf\xbf]"
"[\x92\xa9-\xf4\x8f\xbf\xbf]"
"\\1\\2(b\\1\\2))\\2)\\1"
"\\1\\2(a(?:\\1\\2))\\2)\\1"
"?:\\1"
"\\1(b\\1\\2))\\2)\\1"
"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
"foo(?=bar)bar)baz"
"fo(?o(?o(?o(?=bar)baz"
"foo(?=bar)baz"
"foo(?=bar)bar)az"

@ -0,0 +1,282 @@
#
# AFL dictionary for SQL
# ----------------------
#
# Modeled based on SQLite documentation, contains some number of SQLite
# extensions. Other dialects of SQL may benefit from customized dictionaries.
#
# If you append @1 to the file name when loading this dictionary, afl-fuzz
# will also additionally load a selection of pragma keywords that are very
# specific to SQLite (and are probably less interesting from the security
# standpoint, because they are usually not allowed in non-privileged
# contexts).
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
function_abs=" abs(1)"
function_avg=" avg(1)"
function_changes=" changes()"
function_char=" char(1)"
function_coalesce=" coalesce(1,1)"
function_count=" count(1)"
function_date=" date(1,1,1)"
function_datetime=" datetime(1,1,1)"
function_decimal=" decimal(1,1)"
function_glob=" glob(1,1)"
function_group_concat=" group_concat(1,1)"
function_hex=" hex(1)"
function_ifnull=" ifnull(1,1)"
function_instr=" instr(1,1)"
function_julianday=" julianday(1,1,1)"
function_last_insert_rowid=" last_insert_rowid()"
function_length=" length(1)"
function_like=" like(1,1)"
function_likelihood=" likelihood(1,1)"
function_likely=" likely(1)"
function_load_extension=" load_extension(1,1)"
function_lower=" lower(1)"
function_ltrim=" ltrim(1,1)"
function_max=" max(1,1)"
function_min=" min(1,1)"
function_nullif=" nullif(1,1)"
function_printf=" printf(1,1)"
function_quote=" quote(1)"
function_random=" random()"
function_randomblob=" randomblob(1)"
function_replace=" replace(1,1,1)"
function_round=" round(1,1)"
function_rtrim=" rtrim(1,1)"
function_soundex=" soundex(1)"
function_sqlite_compileoption_get=" sqlite_compileoption_get(1)"
function_sqlite_compileoption_used=" sqlite_compileoption_used(1)"
function_sqlite_source_id=" sqlite_source_id()"
function_sqlite_version=" sqlite_version()"
function_strftime=" strftime(1,1,1,1)"
function_substr=" substr(1,1,1)"
function_sum=" sum(1)"
function_time=" time(1,1,1)"
function_total=" total(1)"
function_total_changes=" total_changes()"
function_trim=" trim(1,1)"
function_typeof=" typeof(1)"
function_unicode=" unicode(1)"
function_unlikely=" unlikely(1)"
function_upper=" upper(1)"
function_varchar=" varchar(1)"
function_zeroblob=" zeroblob(1)"
keyword_ABORT="ABORT"
keyword_ACTION="ACTION"
keyword_ADD="ADD"
keyword_AFTER="AFTER"
keyword_ALL="ALL"
keyword_ALTER="ALTER"
keyword_ANALYZE="ANALYZE"
keyword_AND="AND"
keyword_AS="AS"
keyword_ASC="ASC"
keyword_ATTACH="ATTACH"
keyword_AUTOINCREMENT="AUTOINCREMENT"
keyword_BEFORE="BEFORE"
keyword_BEGIN="BEGIN"
keyword_BETWEEN="BETWEEN"
keyword_BY="BY"
keyword_CASCADE="CASCADE"
keyword_CASE="CASE"
keyword_CAST="CAST"
keyword_CHECK="CHECK"
keyword_COLLATE="COLLATE"
keyword_COLUMN="COLUMN"
keyword_COMMIT="COMMIT"
keyword_CONFLICT="CONFLICT"
keyword_CONSTRAINT="CONSTRAINT"
keyword_CREATE="CREATE"
keyword_CROSS="CROSS"
keyword_CURRENT_DATE="CURRENT_DATE"
keyword_CURRENT_TIME="CURRENT_TIME"
keyword_CURRENT_TIMESTAMP="CURRENT_TIMESTAMP"
keyword_DATABASE="DATABASE"
keyword_DEFAULT="DEFAULT"
keyword_DEFERRABLE="DEFERRABLE"
keyword_DEFERRED="DEFERRED"
keyword_DELETE="DELETE"
keyword_DESC="DESC"
keyword_DETACH="DETACH"
keyword_DISTINCT="DISTINCT"
keyword_DROP="DROP"
keyword_EACH="EACH"
keyword_ELSE="ELSE"
keyword_END="END"
keyword_ESCAPE="ESCAPE"
keyword_EXCEPT="EXCEPT"
keyword_EXCLUSIVE="EXCLUSIVE"
keyword_EXISTS="EXISTS"
keyword_EXPLAIN="EXPLAIN"
keyword_FAIL="FAIL"
keyword_FOR="FOR"
keyword_FOREIGN="FOREIGN"
keyword_FROM="FROM"
keyword_FULL="FULL"
keyword_GLOB="GLOB"
keyword_GROUP="GROUP"
keyword_HAVING="HAVING"
keyword_IF="IF"
keyword_IGNORE="IGNORE"
keyword_IMMEDIATE="IMMEDIATE"
keyword_IN="IN"
keyword_INDEX="INDEX"
keyword_INDEXED="INDEXED"
keyword_INITIALLY="INITIALLY"
keyword_INNER="INNER"
keyword_INSERT="INSERT"
keyword_INSTEAD="INSTEAD"
keyword_INTERSECT="INTERSECT"
keyword_INTO="INTO"
keyword_IS="IS"
keyword_ISNULL="ISNULL"
keyword_JOIN="JOIN"
keyword_KEY="KEY"
keyword_LEFT="LEFT"
keyword_LIKE="LIKE"
keyword_LIMIT="LIMIT"
keyword_MATCH="MATCH"
keyword_NATURAL="NATURAL"
keyword_NO="NO"
keyword_NOT="NOT"
keyword_NOTNULL="NOTNULL"
keyword_NULL="NULL"
keyword_OF="OF"
keyword_OFFSET="OFFSET"
keyword_ON="ON"
keyword_OR="OR"
keyword_ORDER="ORDER"
keyword_OUTER="OUTER"
keyword_PLAN="PLAN"
keyword_PRAGMA="PRAGMA"
keyword_PRIMARY="PRIMARY"
keyword_QUERY="QUERY"
keyword_RAISE="RAISE"
keyword_RECURSIVE="RECURSIVE"
keyword_REFERENCES="REFERENCES"
keyword_REGEXP="REGEXP"
keyword_REINDEX="REINDEX"
keyword_RELEASE="RELEASE"
keyword_RENAME="RENAME"
keyword_REPLACE="REPLACE"
keyword_RESTRICT="RESTRICT"
keyword_RIGHT="RIGHT"
keyword_ROLLBACK="ROLLBACK"
keyword_ROW="ROW"
keyword_SAVEPOINT="SAVEPOINT"
keyword_SELECT="SELECT"
keyword_SET="SET"
keyword_TABLE="TABLE"
keyword_TEMP="TEMP"
keyword_TEMPORARY="TEMPORARY"
keyword_THEN="THEN"
keyword_TO="TO"
keyword_TRANSACTION="TRANSACTION"
keyword_TRIGGER="TRIGGER"
keyword_UNION="UNION"
keyword_UNIQUE="UNIQUE"
keyword_UPDATE="UPDATE"
keyword_USING="USING"
keyword_VACUUM="VACUUM"
keyword_VALUES="VALUES"
keyword_VIEW="VIEW"
keyword_VIRTUAL="VIRTUAL"
keyword_WHEN="WHEN"
keyword_WHERE="WHERE"
keyword_WITH="WITH"
keyword_WITHOUT="WITHOUT"
operator_concat=" || "
operator_ebove_eq=" >="
snippet_1eq1=" 1=1"
snippet_at=" @1"
snippet_backticks=" `a`"
snippet_blob=" blob"
snippet_brackets=" [a]"
snippet_colon=" :1"
snippet_comment=" /* */"
snippet_date="2001-01-01"
snippet_dollar=" $1"
snippet_dotref=" a.b"
snippet_fmtY="%Y"
snippet_int=" int"
snippet_neg1=" -1"
snippet_pair=" a,b"
snippet_parentheses=" (1)"
snippet_plus2days="+2 days"
snippet_qmark=" ?1"
snippet_semicolon=" ;"
snippet_star=" *"
snippet_string_pair=" \"a\",\"b\""
string_dbl_q=" \"a\""
string_escaped_q=" 'a''b'"
string_single_q=" 'a'"
pragma_application_id@1=" application_id"
pragma_auto_vacuum@1=" auto_vacuum"
pragma_automatic_index@1=" automatic_index"
pragma_busy_timeout@1=" busy_timeout"
pragma_cache_size@1=" cache_size"
pragma_cache_spill@1=" cache_spill"
pragma_case_sensitive_like@1=" case_sensitive_like"
pragma_checkpoint_fullfsync@1=" checkpoint_fullfsync"
pragma_collation_list@1=" collation_list"
pragma_compile_options@1=" compile_options"
pragma_count_changes@1=" count_changes"
pragma_data_store_directory@1=" data_store_directory"
pragma_database_list@1=" database_list"
pragma_default_cache_size@1=" default_cache_size"
pragma_defer_foreign_keys@1=" defer_foreign_keys"
pragma_empty_result_callbacks@1=" empty_result_callbacks"
pragma_encoding@1=" encoding"
pragma_foreign_key_check@1=" foreign_key_check"
pragma_foreign_key_list@1=" foreign_key_list"
pragma_foreign_keys@1=" foreign_keys"
pragma_freelist_count@1=" freelist_count"
pragma_full_column_names@1=" full_column_names"
pragma_fullfsync@1=" fullfsync"
pragma_ignore_check_constraints@1=" ignore_check_constraints"
pragma_incremental_vacuum@1=" incremental_vacuum"
pragma_index_info@1=" index_info"
pragma_index_list@1=" index_list"
pragma_integrity_check@1=" integrity_check"
pragma_journal_mode@1=" journal_mode"
pragma_journal_size_limit@1=" journal_size_limit"
pragma_legacy_file_format@1=" legacy_file_format"
pragma_locking_mode@1=" locking_mode"
pragma_max_page_count@1=" max_page_count"
pragma_mmap_size@1=" mmap_size"
pragma_page_count@1=" page_count"
pragma_page_size@1=" page_size"
pragma_parser_trace@1=" parser_trace"
pragma_query_only@1=" query_only"
pragma_quick_check@1=" quick_check"
pragma_read_uncommitted@1=" read_uncommitted"
pragma_recursive_triggers@1=" recursive_triggers"
pragma_reverse_unordered_selects@1=" reverse_unordered_selects"
pragma_schema_version@1=" schema_version"
pragma_secure_delete@1=" secure_delete"
pragma_short_column_names@1=" short_column_names"
pragma_shrink_memory@1=" shrink_memory"
pragma_soft_heap_limit@1=" soft_heap_limit"
pragma_stats@1=" stats"
pragma_synchronous@1=" synchronous"
pragma_table_info@1=" table_info"
pragma_temp_store@1=" temp_store"
pragma_temp_store_directory@1=" temp_store_directory"
pragma_threads@1=" threads"
pragma_user_version@1=" user_version"
pragma_vdbe_addoptrace@1=" vdbe_addoptrace"
pragma_vdbe_debug@1=" vdbe_debug"
pragma_vdbe_listing@1=" vdbe_listing"
pragma_vdbe_trace@1=" vdbe_trace"
pragma_wal_autocheckpoint@1=" wal_autocheckpoint"
pragma_wal_checkpoint@1=" wal_checkpoint"
pragma_writable_schema@1=" writable_schema"

@ -0,0 +1,51 @@
#
# AFL dictionary for TIFF images
# ------------------------------
#
# Just the basic, standard-originating sections; does not include vendor
# extensions.
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
header_ii="II*\x00"
header_mm="MM\x00*"
section_100="\x00\x01"
section_101="\x01\x01"
section_102="\x02\x01"
section_103="\x03\x01"
section_106="\x06\x01"
section_107="\x07\x01"
section_10D="\x0d\x01"
section_10E="\x0e\x01"
section_10F="\x0f\x01"
section_110="\x10\x01"
section_111="\x11\x01"
section_112="\x12\x01"
section_115="\x15\x01"
section_116="\x16\x01"
section_117="\x17\x01"
section_11A="\x1a\x01"
section_11B="\x1b\x01"
section_11C="\x1c\x01"
section_11D="\x1d\x01"
section_11E="\x1e\x01"
section_11F="\x1f\x01"
section_122="\"\x01"
section_123="#\x01"
section_124="$\x01"
section_125="%\x01"
section_128="(\x01"
section_129=")\x01"
section_12D="-\x01"
section_131="1\x01"
section_132="2\x01"
section_13B=";\x01"
section_13C="<\x01"
section_13D="=\x01"
section_13E=">\x01"
section_13F="?\x01"
section_140="@\x01"
section_FE="\xfe\x00"
section_FF="\xff\x00"

@ -0,0 +1,20 @@
#
# AFL dictionary for WebP images
# ------------------------------
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
header_RIFF="RIFF"
header_WEBP="WEBP"
section_ALPH="ALPH"
section_ANIM="ANIM"
section_ANMF="ANMF"
section_EXIF="EXIF"
section_FRGM="FRGM"
section_ICCP="ICCP"
section_VP8="VP8 "
section_VP8L="VP8L"
section_VP8X="VP8X"
section_XMP="XMP "

@ -0,0 +1,72 @@
#
# AFL dictionary for XML
# ----------------------
#
# Several basic syntax elements and attributes, modeled on libxml2.
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
attr_encoding=" encoding=\"1\""
attr_generic=" a=\"1\""
attr_href=" href=\"1\""
attr_standalone=" standalone=\"no\""
attr_version=" version=\"1\""
attr_xml_base=" xml:base=\"1\""
attr_xml_id=" xml:id=\"1\""
attr_xml_lang=" xml:lang=\"1\""
attr_xml_space=" xml:space=\"1\""
attr_xmlns=" xmlns=\"1\""
entity_builtin="&lt;"
entity_decimal="&#1;"
entity_external="&a;"
entity_hex="&#x1;"
string_any="ANY"
string_brackets="[]"
string_cdata="CDATA"
string_col_fallback=":fallback"
string_col_generic=":a"
string_col_include=":include"
string_dashes="--"
string_empty="EMPTY"
string_empty_dblquotes="\"\""
string_empty_quotes="''"
string_entities="ENTITIES"
string_entity="ENTITY"
string_fixed="#FIXED"
string_id="ID"
string_idref="IDREF"
string_idrefs="IDREFS"
string_implied="#IMPLIED"
string_nmtoken="NMTOKEN"
string_nmtokens="NMTOKENS"
string_notation="NOTATION"
string_parentheses="()"
string_pcdata="#PCDATA"
string_percent="%a"
string_public="PUBLIC"
string_required="#REQUIRED"
string_schema=":schema"
string_system="SYSTEM"
string_ucs4="UCS-4"
string_utf16="UTF-16"
string_utf8="UTF-8"
string_xmlns="xmlns:"
tag_attlist="<!ATTLIST"
tag_cdata="<![CDATA["
tag_close="</a>"
tag_doctype="<!DOCTYPE"
tag_element="<!ELEMENT"
tag_entity="<!ENTITY"
tag_ignore="<![IGNORE["
tag_include="<![INCLUDE["
tag_notation="<!NOTATION"
tag_open="<a>"
tag_open_close="<a />"
tag_open_exclamation="<!"
tag_open_q="<?"
tag_sq2_close="]]>"
tag_xml_q="<?xml?>"

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

File diff suppressed because it is too large Load Diff

@ -0,0 +1,183 @@
=========================
Installation instructions
=========================
This document provides basic installation instructions and discusses known
issues for a variety of platforms. See README for the general instruction
manual.
1) Linux on x86
---------------
This platform is expected to work well. Compile the program with:
$ make
You can start using the fuzzer without installation, but it is also possible to
install it with:
# make install
There are no special dependencies to speak of; you will need GNU make and a
working compiler (gcc or clang). Some of the optional scripts bundled with the
program may depend on bash, gdb, and similar basic tools.
If you are using clang, please review llvm_mode/README.llvm; the LLVM
integration mode can offer substantial performance gains compared to the
traditional approach.
You may have to change several settings to get optimal results (most notably,
disable crash reporting utilities and switch to a different CPU governor), but
afl-fuzz will guide you through that if necessary.
2) OpenBSD, FreeBSD, NetBSD on x86
----------------------------------
Similarly to Linux, these platforms are expected to work well and are
regularly tested. Compile everything with GNU make:
$ gmake
Note that BSD make will *not* work; if you do not have gmake on your system,
please install it first. As on Linux, you can use the fuzzer itself without
installation, or install it with:
# gmake install
Keep in mind that if you are using csh as your shell, the syntax of some of the
shell commands given in the README and other docs will be different.
The llvm_mode requires a dynamically linked, fully-operational installation of
clang. At least on FreeBSD, the clang binaries are static and do not include
some of the essential tools, so if you want to make it work, you may need to
follow the instructions in llvm_mode/README.llvm.
Beyond that, everything should work as advertised.
The QEMU mode is currently supported only on Linux. I think it's just a QEMU
problem, I couldn't get a vanilla copy of user-mode emulation support working
correctly on BSD at all.
3) MacOS X on x86
-----------------
MacOS X should work, but there are some gotchas due to the idiosyncrasies of
the platform. On top of this, I have limited release testing capabilities
and depend mostly on user feedback.
To build AFL, install Xcode and follow the general instructions for Linux.
The Xcode 'gcc' tool is just a wrapper for clang, so be sure to use afl-clang
to compile any instrumented binaries; afl-gcc will fail unless you have GCC
installed from another source (in which case, please specify AFL_CC and
AFL_CXX to point to the "real" GCC binaries).
Only 64-bit compilation will work on the platform; porting the 32-bit
instrumentation would require a fair amount of work due to the way OS X
handles relocations, and today, virtually all MacOS X boxes are 64-bit.
The crash reporting daemon that comes by default with MacOS X will cause
problems with fuzzing. You need to turn it off by following the instructions
provided here: http://goo.gl/CCcd5u
The fork() semantics on OS X are a bit unusual compared to other unix systems
and definitely don't look POSIX-compliant. This means two things:
- Fuzzing will be probably slower than on Linux. In fact, some folks report
considerable performance gains by running the jobs inside a Linux VM on
MacOS X.
- Some non-portable, platform-specific code may be incompatible with the
AFL forkserver. If you run into any problems, set AFL_NO_FORKSRV=1 in the
environment before starting afl-fuzz.
User emulation mode of QEMU does not appear to be supported on MacOS X, so
black-box instrumentation mode (-Q) will not work.
The llvm_mode requires a fully-operational installation of clang. The one that
comes with Xcode is missing some of the essential headers and helper tools.
See llvm_mode/README.llvm for advice on how to build the compiler from scratch.
4) Linux or *BSD on non-x86 systems
-----------------------------------
Standard build will fail on non-x86 systems, but you should be able to
leverage two other options:
- The LLVM mode (see llvm_mode/README.llvm), which does not rely on
x86-specific assembly shims. It's fast and robust, but requires a
complete installation of clang.
- The QEMU mode (see qemu_mode/README.qemu), which can be also used for
fuzzing cross-platform binaries. It's slower and more fragile, but
can be used even when you don't have the source for the tested app.
If you're not sure what you need, you need the LLVM mode. To get it, try:
$ AFL_NO_X86=1 gmake && gmake -C llvm_mode
...and compile your target program with afl-clang-fast or afl-clang-fast++
instead of the traditional afl-gcc or afl-clang wrappers.
5) Solaris on x86
-----------------
The fuzzer reportedly works on Solaris, but I have not tested this first-hand,
and the user base is fairly small, so I don't have a lot of feedback.
To get the ball rolling, you will need to use GNU make and GCC or clang. I'm
being told that the stock version of GCC that comes with the platform does not
work properly due to its reliance on a hardcoded location for 'as' (completely
ignoring the -B parameter or $PATH).
To fix this, you may want to build stock GCC from the source, like so:
$ ./configure --prefix=$HOME/gcc --with-gnu-as --with-gnu-ld \
--with-gmp-include=/usr/include/gmp --with-mpfr-include=/usr/include/mpfr
$ make
$ sudo make install
Do *not* specify --with-as=/usr/gnu/bin/as - this will produce a GCC binary that
ignores the -B flag and you will be back to square one.
Note that Solaris reportedly comes with crash reporting enabled, which causes
problems with crashes being misinterpreted as hangs, similarly to the gotchas
for Linux and MacOS X. AFL does not auto-detect crash reporting on this
particular platform, but you may need to run the following command:
$ coreadm -d global -d global-setid -d process -d proc-setid \
-d kzone -d log
User emulation mode of QEMU is not available on Solaris, so black-box
instrumentation mode (-Q) will not work.
6) Everything else
------------------
You're on your own. On POSIX-compliant systems, you may be able to compile and
run the fuzzer; and the LLVM mode may offer a way to instrument non-x86 code.
The fuzzer will not run on Windows. It will also not work under Cygwin. It
could be ported to the latter platform fairly easily, but it's a pretty bad
idea, because Cygwin is extremely slow. It makes much more sense to use
VirtualBox or so to run a hardware-accelerated Linux VM; it will run around
20x faster or so. If you have a *really* compelling use case for Cygwin, let
me know.
Although Android on x86 should theoretically work, the stock kernel may have
SHM support compiled out, and if so, you may have to address that issue first.
It's possible that all you need is this workaround:
https://github.com/pelya/android-shmem
Joshua J. Drake notes that the Android linker adds a shim that automatically
intercepts SIGSEGV and related signals. To fix this issue and be able to see
crashes, you need to put this at the beginning of the fuzzed program:
signal(SIGILL, SIG_DFL);
signal(SIGABRT, SIG_DFL);
signal(SIGBUS, SIG_DFL);
signal(SIGFPE, SIG_DFL);
signal(SIGSEGV, SIG_DFL);
You may need to #include <signal.h> first.

@ -0,0 +1,49 @@
=====================
AFL quick start guide
=====================
You should read docs/README. It's pretty short. If you really can't, here's
how to hit the ground running:
1) Compile AFL with 'make'. If build fails, see docs/INSTALL for tips.
2) Find or write a reasonably fast and simple program that takes data from
a file or stdin, processes it in a test-worthy way, then exits cleanly.
If testing a network service, modify it to run in the foreground and read
from stdin. When fuzzing a format that uses checksums, comment out the
checksum verification code, too.
The program must crash properly when a fault is encountered. Watch out for
custom SIGSEGV or SIGABRT handlers and background processes. For tips on
detecting non-crashing flaws, see section 11 in docs/README.
3) Compile the program / library to be fuzzed using afl-gcc. A common way to
do this would be:
CC=/path/to/afl-gcc CXX=/path/to/afl-g++ ./configure --disable-shared
make clean all
If program build fails, ping <afl-users@googlegroups.com>.
4) Get a small but valid input file that makes sense to the program. When
fuzzing verbose syntax (SQL, HTTP, etc), create a dictionary as described in
dictionaries/README.dictionaries, too.
5) If the program reads from stdin, run 'afl-fuzz' like so:
./afl-fuzz -i testcase_dir -o findings_dir -- \
/path/to/tested/program [...program's cmdline...]
If the program takes input from a file, you can put @@ in the program's
command line; AFL will put an auto-generated file name in there for you.
6) Investigate anything shown in red in the fuzzer UI by promptly consulting
docs/status_screen.txt.
That's it. Sit back, relax, and - time permitting - try to skim through the
following files:
- docs/README - A general introduction to AFL,
- docs/perf_tips.txt - Simple tips on how to fuzz more quickly,
- docs/status_screen.txt - An explanation of the tidbits shown in the UI,
- docs/parallel_fuzzing.txt - Advice on running AFL on multiple cores.

@ -0,0 +1,281 @@
=======================
Environmental variables
=======================
This document discusses the environment variables used by American Fuzzy Lop
to expose various exotic functions that may be (rarely) useful for power
users or for some types of custom fuzzing setups. See README for the general
instruction manual.
1) Settings for afl-gcc, afl-clang, and afl-as
----------------------------------------------
Because they can't directly accept command-line options, the compile-time
tools make fairly broad use of environmental variables:
- Setting AFL_HARDEN automatically adds code hardening options when invoking
the downstream compiler. This currently includes -D_FORTIFY_SOURCE=2 and
-fstack-protector-all. The setting is useful for catching non-crashing
memory bugs at the expense of a very slight (sub-5%) performance loss.
- By default, the wrapper appends -O3 to optimize builds. Very rarely, this
will cause problems in programs built with -Werror, simply because -O3
enables more thorough code analysis and can spew out additional warnings.
To disable optimizations, set AFL_DONT_OPTIMIZE.
- Setting AFL_USE_ASAN automatically enables ASAN, provided that your
compiler supports that. Note that fuzzing with ASAN is mildly challenging
- see notes_for_asan.txt.
(You can also enable MSAN via AFL_USE_MSAN; ASAN and MSAN come with the
same gotchas; the modes are mutually exclusive. UBSAN and other exotic
sanitizers are not officially supported yet, but are easy to get to work
by hand.)
- Setting AFL_CC, AFL_CXX, and AFL_AS lets you use alternate downstream
compilation tools, rather than the default 'clang', 'gcc', or 'as' binaries
in your $PATH.
- AFL_PATH can be used to point afl-gcc to an alternate location of afl-as.
One possible use of this is experimental/clang_asm_normalize/, which lets
you instrument hand-written assembly when compiling clang code by plugging
a normalizer into the chain. (There is no equivalent feature for GCC.)
- Setting AFL_INST_RATIO to a percentage between 0 and 100% controls the
probability of instrumenting every branch. This is (very rarely) useful
when dealing with exceptionally complex programs that saturate the output
bitmap. Examples include v8, ffmpeg, and perl.
(If this ever happens, afl-fuzz will warn you ahead of the time by
displaying the "bitmap density" field in fiery red.)
Setting AFL_INST_RATIO to 0 is a valid choice. This will instrument only
the transitions between function entry points, but not individual branches.
- AFL_NO_BUILTIN causes the compiler to generate code suitable for use with
libtokencap.so (but perhaps running a bit slower than without the flag).
- TMPDIR is used by afl-as for temporary files; if this variable is not set,
the tool defaults to /tmp.
- Setting AFL_KEEP_ASSEMBLY prevents afl-as from deleting instrumented
assembly files. Useful for troubleshooting problems or understanding how
the tool works. To get them in a predictable place, try something like:
mkdir assembly_here
TMPDIR=$PWD/assembly_here AFL_KEEP_ASSEMBLY=1 make clean all
- Setting AFL_QUIET will prevent afl-cc and afl-as banners from being
displayed during compilation, in case you find them distracting.
2) Settings for afl-clang-fast
------------------------------
The native LLVM instrumentation helper accepts a subset of the settings
discussed in section #1, with the exception of:
- AFL_AS, since this toolchain does not directly invoke GNU as.
- TMPDIR and AFL_KEEP_ASSEMBLY, since no temporary assembly files are
created.
Note that AFL_INST_RATIO will behave a bit differently than for afl-gcc,
because functions are *not* instrumented unconditionally - so low values
will have a more striking effect. For this tool, 0 is not a valid choice.
3) Settings for afl-fuzz
------------------------
The main fuzzer binary accepts several options that disable a couple of sanity
checks or alter some of the more exotic semantics of the tool:
- Setting AFL_SKIP_CPUFREQ skips the check for CPU scaling policy. This is
useful if you can't change the defaults (e.g., no root access to the
system) and are OK with some performance loss.
- Setting AFL_NO_FORKSRV disables the forkserver optimization, reverting to
fork + execve() call for every tested input. This is useful mostly when
working with unruly libraries that create threads or do other crazy
things when initializing (before the instrumentation has a chance to run).
Note that this setting inhibits some of the user-friendly diagnostics
normally done when starting up the forkserver and causes a pretty
significant performance drop.
- AFL_EXIT_WHEN_DONE causes afl-fuzz to terminate when all existing paths
have been fuzzed and there were no new finds for a while. This would be
normally indicated by the cycle counter in the UI turning green. May be
convenient for some types of automated jobs.
- Setting AFL_NO_AFFINITY disables attempts to bind to a specific CPU core
on Linux systems. This slows things down, but lets you run more instances
of afl-fuzz than would be prudent (if you really want to).
- AFL_SKIP_CRASHES causes AFL to tolerate crashing files in the input
queue. This can help with rare situations where a program crashes only
intermittently, but it's not really recommended under normal operating
conditions.
- Setting AFL_HANG_TMOUT allows you to specify a different timeout for
deciding if a particular test case is a "hang". The default is 1 second
or the value of the -t parameter, whichever is larger. Dialing the value
down can be useful if you are very concerned about slow inputs, or if you
don't want AFL to spend too much time classifying that stuff and just
rapidly put all timeouts in that bin.
- AFL_NO_ARITH causes AFL to skip most of the deterministic arithmetics.
This can be useful to speed up the fuzzing of text-based file formats.
- AFL_SHUFFLE_QUEUE randomly reorders the input queue on startup. Requested
by some users for unorthodox parallelized fuzzing setups, but not
advisable otherwise.
- When developing custom instrumentation on top of afl-fuzz, you can use
AFL_SKIP_BIN_CHECK to inhibit the checks for non-instrumented binaries
and shell scripts; and AFL_DUMB_FORKSRV in conjunction with the -n
setting to instruct afl-fuzz to still follow the fork server protocol
without expecting any instrumentation data in return.
- When running in the -M or -S mode, setting AFL_IMPORT_FIRST causes the
fuzzer to import test cases from other instances before doing anything
else. This makes the "own finds" counter in the UI more accurate.
Beyond counter aesthetics, not much else should change.
- Setting AFL_POST_LIBRARY allows you to configure a postprocessor for
mutated files - say, to fix up checksums. See experimental/post_library/
for more.
- AFL_FAST_CAL keeps the calibration stage about 2.5x faster (albeit less
precise), which can help when starting a session against a slow target.
- The CPU widget shown at the bottom of the screen is fairly simplistic and
may complain of high load prematurely, especially on systems with low core
counts. To avoid the alarming red color, you can set AFL_NO_CPU_RED.
- In QEMU mode (-Q), AFL_PATH will be searched for afl-qemu-trace.
- Setting AFL_PRELOAD causes AFL to set LD_PRELOAD for the target binary
without disrupting the afl-fuzz process itself. This is useful, among other
things, for bootstrapping libdislocator.so.
- Setting AFL_NO_UI inhibits the UI altogether, and just periodically prints
some basic stats. This behavior is also automatically triggered when the
output from afl-fuzz is redirected to a file or to a pipe.
- If you are Jakub, you may need AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES.
Others need not apply.
- Benchmarking only: AFL_BENCH_JUST_ONE causes the fuzzer to exit after
processing the first queue entry; and AFL_BENCH_UNTIL_CRASH causes it to
exit soon after the first crash is found.
4) Settings for afl-qemu-trace
------------------------------
The QEMU wrapper used to instrument binary-only code supports several settings:
- It is possible to set AFL_INST_RATIO to skip the instrumentation on some
of the basic blocks, which can be useful when dealing with very complex
binaries.
- Setting AFL_INST_LIBS causes the translator to also instrument the code
inside any dynamically linked libraries (notably including glibc).
- The underlying QEMU binary will recognize any standard "user space
emulation" variables (e.g., QEMU_STACK_SIZE), but there should be no
reason to touch them.
5) Settings for afl-cmin
------------------------
The corpus minimization script offers very little customization:
- Setting AFL_PATH offers a way to specify the location of afl-showmap
and afl-qemu-trace (the latter only in -Q mode).
- AFL_KEEP_TRACES makes the tool keep traces and other metadata used for
minimization and normally deleted at exit. The files can be found in the
<out_dir>/.traces/*.
- AFL_ALLOW_TMP permits this and some other scripts to run in /tmp. This is
a modest security risk on multi-user systems with rogue users, but should
be safe on dedicated fuzzing boxes.
6) Settings for afl-tmin
------------------------
Virtually nothing to play with. Well, in QEMU mode (-Q), AFL_PATH will be
searched for afl-qemu-trace. In addition to this, TMPDIR may be used if a
temporary file can't be created in the current working directory.
You can specify AFL_TMIN_EXACT if you want afl-tmin to require execution paths
to match when minimizing crashes. This will make minimization less useful, but
may prevent the tool from "jumping" from one crashing condition to another in
very buggy software. You probably want to combine it with the -e flag.
7) Settings for afl-analyze
---------------------------
You can set AFL_ANALYZE_HEX to get file offsets printed as hexadecimal instead
of decimal.
8) Settings for libdislocator.so
--------------------------------
The library honors three environmental variables:
- AFL_LD_LIMIT_MB caps the size of the maximum heap usage permitted by the
library, in megabytes. The default value is 1 GB. Once this is exceeded,
allocations will return NULL.
- AFL_LD_HARD_FAIL alters the behavior by calling abort() on excessive
allocations, thus causing what AFL would perceive as a crash. Useful for
programs that are supposed to maintain a specific memory footprint.
- AFL_LD_VERBOSE causes the library to output some diagnostic messages
that may be useful for pinpointing the cause of any observed issues.
- AFL_LD_NO_CALLOC_OVER inhibits abort() on calloc() overflows. Most
of the common allocators check for that internally and return NULL, so
it's a security risk only in more exotic setups.
9) Settings for libtokencap.so
------------------------------
This library accepts AFL_TOKEN_FILE to indicate the location to which the
discovered tokens should be written.
10) Third-party variables set by afl-fuzz & other tools
-------------------------------------------------------
Several variables are not directly interpreted by afl-fuzz, but are set to
optimal values if not already present in the environment:
- By default, LD_BIND_NOW is set to speed up fuzzing by forcing the
linker to do all the work before the fork server kicks in. You can
override this by setting LD_BIND_LAZY beforehand, but it is almost
certainly pointless.
- By default, ASAN_OPTIONS are set to:
abort_on_error=1
detect_leaks=0
symbolize=0
allocator_may_return_null=1
If you want to set your own options, be sure to include abort_on_error=1 -
otherwise, the fuzzer will not be able to detect crashes in the tested
app. Similarly, include symbolize=0, since without it, AFL may have
difficulty telling crashes and hangs apart.
- In the same vein, by default, MSAN_OPTIONS are set to:
exit_code=86 (required for legacy reasons)
abort_on_error=1
symbolize=0
msan_track_origins=0
allocator_may_return_null=1
Be sure to include the first one when customizing anything, since some
MSAN versions don't call abort() on error, and we need a way to detect
faults.

@ -0,0 +1,147 @@
================
Historical notes
================
This doc talks about the rationale of some of the high-level design decisions
for American Fuzzy Lop. It's adopted from a discussion with Rob Graham.
See README for the general instruction manual, and technical_details.txt for
additional implementation-level insights.
1) Influences
-------------
In short, afl-fuzz is inspired chiefly by the work done by Tavis Ormandy back
in 2007. Tavis did some very persuasive experiments using gcov block coverage
to select optimal test cases out of a large corpus of data, and then using
them as a starting point for traditional fuzzing workflows.
(By "persuasive", I mean: netting a significant number of interesting
vulnerabilities.)
In parallel to this, both Tavis and I were interested in evolutionary fuzzing.
Tavis had his experiments, and I was working on a tool called bunny-the-fuzzer,
released somewhere in 2007.
Bunny used a generational algorithm not much different from afl-fuzz, but
also tried to reason about the relationship between various input bits and
the internal state of the program, with hopes of deriving some additional value
from that. The reasoning / correlation part was probably in part inspired by
other projects done around the same time by Will Drewry and Chris Evans.
The state correlation approach sounded very sexy on paper, but ultimately, made
the fuzzer complicated, brittle, and cumbersome to use; every other target
program would require a tweak or two. Because Bunny didn't fare a whole lot
better than less sophisticated brute-force tools, I eventually decided to write
it off. You can still find its original documentation at:
https://code.google.com/p/bunny-the-fuzzer/wiki/BunnyDoc
There has been a fair amount of independent work, too. Most notably, a few
weeks earlier that year, Jared DeMott had a Defcon presentation about a
coverage-driven fuzzer that relied on coverage as a fitness function.
Jared's approach was by no means identical to what afl-fuzz does, but it was in
the same ballpark. His fuzzer tried to explicitly solve for the maximum coverage
with a single input file; in comparison, afl simply selects for cases that do
something new (which yields better results - see technical_details.txt).
A few years later, Gabriel Campana released fuzzgrind, a tool that relied purely
on Valgrind and a constraint solver to maximize coverage without any brute-force
bits; and Microsoft Research folks talked extensively about their still
non-public, solver-based SAGE framework.
In the past six years or so, I've also seen a fair number of academic papers
that dealt with smart fuzzing (focusing chiefly on symbolic execution) and a
couple papers that discussed proof-of-concept applications of genetic
algorithms with the same goals in mind. I'm unconvinced how practical most of
these experiments were; I suspect that many of them suffer from the
bunny-the-fuzzer's curse of being cool on paper and in carefully designed
experiments, but failing the ultimate test of being able to find new,
worthwhile security bugs in otherwise well-fuzzed, real-world software.
In some ways, the baseline that the "cool" solutions have to compete against is
a lot more impressive than it may seem, making it difficult for competitors to
stand out. For a singular example, check out the work by Gynvael and Mateusz
Jurczyk, applying "dumb" fuzzing to ffmpeg, a prominent and security-critical
component of modern browsers and media players:
http://googleonlinesecurity.blogspot.com/2014/01/ffmpeg-and-thousand-fixes.html
Effortlessly getting comparable results with state-of-the-art symbolic execution
in equally complex software still seems fairly unlikely, and hasn't been
demonstrated in practice so far.
But I digress; ultimately, attribution is hard, and glorying the fundamental
concepts behind AFL is probably a waste of time. The devil is very much in the
often-overlooked details, which brings us to...
2) Design goals for afl-fuzz
----------------------------
In short, I believe that the current implementation of afl-fuzz takes care of
several itches that seemed impossible to scratch with other tools:
1) Speed. It's genuinely hard to compete with brute force when your "smart"
approach is resource-intensive. If your instrumentation makes it 10x more
likely to find a bug, but runs 100x slower, your users are getting a bad
deal.
To avoid starting with a handicap, afl-fuzz is meant to let you fuzz most of
the intended targets at roughly their native speed - so even if it doesn't
add value, you do not lose much.
On top of this, the tool leverages instrumentation to actually reduce the
amount of work in a couple of ways: for example, by carefully trimming the
corpus or skipping non-functional but non-trimmable regions in the input
files.
2) Rock-solid reliability. It's hard to compete with brute force if your
approach is brittle and fails unexpectedly. Automated testing is attractive
because it's simple to use and scalable; anything that goes against these
principles is an unwelcome trade-off and means that your tool will be used
less often and with less consistent results.
Most of the approaches based on symbolic execution, taint tracking, or
complex syntax-aware instrumentation are currently fairly unreliable with
real-world targets. Perhaps more importantly, their failure modes can render
them strictly worse than "dumb" tools, and such degradation can be difficult
for less experienced users to notice and correct.
In contrast, afl-fuzz is designed to be rock solid, chiefly by keeping it
simple. In fact, at its core, it's designed to be just a very good
traditional fuzzer with a wide range of interesting, well-researched
strategies to go by. The fancy parts just help it focus the effort in
places where it matters the most.
3) Simplicity. The author of a testing framework is probably the only person
who truly understands the impact of all the settings offered by the tool -
and who can dial them in just right. Yet, even the most rudimentary fuzzer
frameworks often come with countless knobs and fuzzing ratios that need to
be guessed by the operator ahead of the time. This can do more harm than
good.
AFL is designed to avoid this as much as possible. The three knobs you
can play with are the output file, the memory limit, and the ability to
override the default, auto-calibrated timeout. The rest is just supposed to
work. When it doesn't, user-friendly error messages outline the probable
causes and workarounds, and get you back on track right away.
4) Chainability. Most general-purpose fuzzers can't be easily employed
against resource-hungry or interaction-heavy tools, necessitating the
creation of custom in-process fuzzers or the investment of massive CPU
power (most of which is wasted on tasks not directly related to the code
we actually want to test).
AFL tries to scratch this itch by allowing users to use more lightweight
targets (e.g., standalone image parsing libraries) to create small
corpora of interesting test cases that can be fed into a manual testing
process or a UI harness later on.
As mentioned in technical_details.txt, AFL does all this not by systematically
applying a single overarching CS concept, but by experimenting with a variety
of small, complementary methods that were shown to reliably yields results
better than chance. The use of instrumentation is a part of that toolkit, but is
far from being the most important one.
Ultimately, what matters is that afl-fuzz is designed to find cool bugs - and
has a pretty robust track record of doing just that.

@ -0,0 +1,128 @@
# ===================
# AFL "Life Pro Tips"
# ===================
#
# Bite-sized advice for those who understand the basics, but can't be bothered
# to read or memorize every other piece of documentation for AFL.
#
%
Get more bang for your buck by using fuzzing dictionaries.
See dictionaries/README.dictionaries to learn how.
%
You can get the most out of your hardware by parallelizing AFL jobs.
See docs/parallel_fuzzing.txt for step-by-step tips.
%
Improve the odds of spotting memory corruption bugs with libdislocator.so!
It's easy. Consult libdislocator/README.dislocator for usage tips.
%
Want to understand how your target parses a particular input file?
Try the bundled afl-analyze tool; it's got colors and all!
%
You can visually monitor the progress of your fuzzing jobs.
Run the bundled afl-plot utility to generate browser-friendly graphs.
%
Need to monitor AFL jobs programmatically? Check out the fuzzer_stats file
in the AFL output dir or try afl-whatsup.
%
Puzzled by something showing up in red or purple in the AFL UI?
It could be important - consult docs/status_screen.txt right away!
%
Know your target? Convert it to persistent mode for a huge performance gain!
Consult section #5 in llvm_mode/README.llvm for tips.
%
Using clang? Check out llvm_mode/ for a faster alternative to afl-gcc!
%
Did you know that AFL can fuzz closed-source or cross-platform binaries?
Check out qemu_mode/README.qemu for more.
%
Did you know that afl-fuzz can minimize any test case for you?
Try the bundled afl-tmin tool - and get small repro files fast!
%
Not sure if a crash is exploitable? AFL can help you figure it out. Specify
-C to enable the peruvian were-rabbit mode. See section #10 in README for more.
%
Trouble dealing with a machine uprising? Relax, we've all been there.
Find essential survival tips at http://lcamtuf.coredump.cx/prep/.
%
AFL-generated corpora can be used to power other testing processes.
See section #2 in README for inspiration - it tends to pay off!
%
Want to automatically spot non-crashing memory handling bugs?
Try running an AFL-generated corpus through ASAN, MSAN, or Valgrind.
%
Good selection of input files is critical to a successful fuzzing job.
See section #5 in README (or docs/perf_tips.txt) for pro tips.
%
You can improve the odds of automatically spotting stack corruption issues.
Specify AFL_HARDEN=1 in the environment to enable hardening flags.
%
Bumping into problems with non-reproducible crashes? It happens, but usually
isn't hard to diagnose. See section #7 in README for tips.
%
Fuzzing is not just about memory corruption issues in the codebase. Add some
sanity-checking assert() / abort() statements to effortlessly catch logic bugs.
%
Hey kid... pssst... want to figure out how AFL really works?
Check out docs/technical_details.txt for all the gory details in one place!
%
There's a ton of third-party helper tools designed to work with AFL!
Be sure to check out docs/sister_projects.txt before writing your own.
%
Need to fuzz the command-line arguments of a particular program?
You can find a simple solution in experimental/argv_fuzzing.
%
Attacking a format that uses checksums? Remove the checksum-checking code or
use a postprocessor! See experimental/post_library/ for more.
%
Dealing with a very slow target or hoping for instant results? Specify -d
when calling afl-fuzz!
%

@ -0,0 +1,143 @@
==================================
Notes for using ASAN with afl-fuzz
==================================
This file discusses some of the caveats for fuzzing under ASAN, and suggests
a handful of alternatives. See README for the general instruction manual.
1) Short version
----------------
ASAN on 64-bit systems requests a lot of memory in a way that can't be easily
distinguished from a misbehaving program bent on crashing your system.
Because of this, fuzzing with ASAN is recommended only in four scenarios:
- On 32-bit systems, where we can always enforce a reasonable memory limit
(-m 800 or so is a good starting point),
- On 64-bit systems only if you can do one of the following:
- Compile the binary in 32-bit mode (gcc -m32),
- Precisely gauge memory needs using http://jwilk.net/software/recidivm .
- Limit the memory available to process using cgroups on Linux (see
experimental/asan_cgroups).
To compile with ASAN, set AFL_USE_ASAN=1 before calling 'make clean all'. The
afl-gcc / afl-clang wrappers will pick that up and add the appropriate flags.
Note that ASAN is incompatible with -static, so be mindful of that.
(You can also use AFL_USE_MSAN=1 to enable MSAN instead.)
There is also the option of generating a corpus using a non-ASAN binary, and
then feeding it to an ASAN-instrumented one to check for bugs. This is faster,
and can give you somewhat comparable results. You can also try using
libdislocator (see libdislocator/README.dislocator in the parent directory) as a
lightweight and hassle-free (but less thorough) alternative.
2) Long version
---------------
ASAN allocates a huge region of virtual address space for bookkeeping purposes.
Most of this is never actually accessed, so the OS never has to allocate any
real pages of memory for the process, and the VM grabbed by ASAN is essentially
"free" - but the mapping counts against the standard OS-enforced limit
(RLIMIT_AS, aka ulimit -v).
On our end, afl-fuzz tries to protect you from processes that go off-rails
and start consuming all the available memory in a vain attempt to parse a
malformed input file. This happens surprisingly often, so enforcing such a limit
is important for almost any fuzzer: the alternative is for the kernel OOM
handler to step in and start killing random processes to free up resources.
Needless to say, that's not a very nice prospect to live with.
Unfortunately, un*x systems offer no portable way to limit the amount of
pages actually given to a process in a way that distinguishes between that
and the harmless "land grab" done by ASAN. In principle, there are three standard
ways to limit the size of the heap:
- The RLIMIT_AS mechanism (ulimit -v) caps the size of the virtual space -
but as noted, this pays no attention to the number of pages actually
in use by the process, and doesn't help us here.
- The RLIMIT_DATA mechanism (ulimit -d) seems like a good fit, but it applies
only to the traditional sbrk() / brk() methods of requesting heap space;
modern allocators, including the one in glibc, routinely rely on mmap()
instead, and circumvent this limit completely.
- Finally, the RLIMIT_RSS limit (ulimit -m) sounds like what we need, but
doesn't work on Linux - mostly because nobody felt like implementing it.
There are also cgroups, but they are Linux-specific, not universally available
even on Linux systems, and they require root permissions to set up; I'm a bit
hesitant to make afl-fuzz require root permissions just for that. That said,
if you are on Linux and want to use cgroups, check out the contributed script
that ships in experimental/asan_cgroups/.
In settings where cgroups aren't available, we have no nice, portable way to
avoid counting the ASAN allocation toward the limit. On 32-bit systems, or for
binaries compiled in 32-bit mode (-m32), this is not a big deal: ASAN needs
around 600-800 MB or so, depending on the compiler - so all you need to do is
to specify -m that is a bit higher than that.
On 64-bit systems, the situation is more murky, because the ASAN allocation
is completely outlandish - around 17.5 TB in older versions, and closer to
20 TB with newest ones. The actual amount of memory on your system is
(probably!) just a tiny fraction of that - so unless you dial the limit
with surgical precision, you will get no protection from OOM bugs.
On my system, the amount of memory grabbed by ASAN with a slightly older
version of gcc is around 17,825,850 MB; for newest clang, it's 20,971,600.
But there is no guarantee that these numbers are stable, and if you get them
wrong by "just" a couple gigs or so, you will be at risk.
To get the precise number, you can use the recidivm tool developed by Jakub
Wilk (http://jwilk.net/software/recidivm). In absence of this, ASAN is *not*
recommended when fuzzing 64-bit binaries, unless you are confident that they
are robust and enforce reasonable memory limits (in which case, you can
specify '-m none' when calling afl-fuzz).
Using recidivm or running with no limits aside, there are two other decent
alternatives: build a corpus of test cases using a non-ASAN binary, and then
examine them with ASAN, Valgrind, or other heavy-duty tools in a more
controlled setting; or compile the target program with -m32 (32-bit mode)
if your system supports that.
3) Interactions with the QEMU mode
----------------------------------
ASAN, MSAN, and other sanitizers appear to be incompatible with QEMU user
emulation, so please do not try to use them with the -Q option; QEMU doesn't
seem to appreciate the shadow VM trick used by these tools, and will likely
just allocate all your physical memory, then crash.
4) ASAN and OOM crashes
-----------------------
By default, ASAN treats memory allocation failures as fatal errors, immediately
causing the program to crash. Since this is a departure from normal POSIX
semantics (and creates the appearance of security issues in otherwise
properly-behaving programs), we try to disable this by specifying
allocator_may_return_null=1 in ASAN_OPTIONS.
Unfortunately, it's been reported that this setting still causes ASAN to
trigger phantom crashes in situations where the standard allocator would
simply return NULL. If this is interfering with your fuzzing jobs, you may
want to cc: yourself on this bug:
https://bugs.llvm.org/show_bug.cgi?id=22026
5) What about UBSAN?
--------------------
Some folks expressed interest in fuzzing with UBSAN. This isn't officially
supported, because many installations of UBSAN don't offer a consistent way
to abort() on fault conditions or to terminate with a distinctive exit code.
That said, some versions of the library can be binary-patched to address this
issue, while newer releases support explicit compile-time flags - see this
mailing list thread for tips:
https://groups.google.com/forum/#!topic/afl-users/GyeSBJt4M38

@ -0,0 +1,216 @@
=========================
Tips for parallel fuzzing
=========================
This document talks about synchronizing afl-fuzz jobs on a single machine
or across a fleet of systems. See README for the general instruction manual.
1) Introduction
---------------
Every copy of afl-fuzz will take up one CPU core. This means that on an
n-core system, you can almost always run around n concurrent fuzzing jobs with
virtually no performance hit (you can use the afl-gotcpu tool to make sure).
In fact, if you rely on just a single job on a multi-core system, you will
be underutilizing the hardware. So, parallelization is usually the right
way to go.
When targeting multiple unrelated binaries or using the tool in "dumb" (-n)
mode, it is perfectly fine to just start up several fully separate instances
of afl-fuzz. The picture gets more complicated when you want to have multiple
fuzzers hammering a common target: if a hard-to-hit but interesting test case
is synthesized by one fuzzer, the remaining instances will not be able to use
that input to guide their work.
To help with this problem, afl-fuzz offers a simple way to synchronize test
cases on the fly.
2) Single-system parallelization
--------------------------------
If you wish to parallelize a single job across multiple cores on a local
system, simply create a new, empty output directory ("sync dir") that will be
shared by all the instances of afl-fuzz; and then come up with a naming scheme
for every instance - say, "fuzzer01", "fuzzer02", etc.
Run the first one ("master", -M) like this:
$ ./afl-fuzz -i testcase_dir -o sync_dir -M fuzzer01 [...other stuff...]
...and then, start up secondary (-S) instances like this:
$ ./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer02 [...other stuff...]
$ ./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer03 [...other stuff...]
Each fuzzer will keep its state in a separate subdirectory, like so:
/path/to/sync_dir/fuzzer01/
Each instance will also periodically rescan the top-level sync directory
for any test cases found by other fuzzers - and will incorporate them into
its own fuzzing when they are deemed interesting enough.
The difference between the -M and -S modes is that the master instance will
still perform deterministic checks; while the secondary instances will
proceed straight to random tweaks. If you don't want to do deterministic
fuzzing at all, it's OK to run all instances with -S. With very slow or complex
targets, or when running heavily parallelized jobs, this is usually a good plan.
Note that running multiple -M instances is wasteful, although there is an
experimental support for parallelizing the deterministic checks. To leverage
that, you need to create -M instances like so:
$ ./afl-fuzz -i testcase_dir -o sync_dir -M masterA:1/3 [...]
$ ./afl-fuzz -i testcase_dir -o sync_dir -M masterB:2/3 [...]
$ ./afl-fuzz -i testcase_dir -o sync_dir -M masterC:3/3 [...]
...where the first value after ':' is the sequential ID of a particular master
instance (starting at 1), and the second value is the total number of fuzzers to
distribute the deterministic fuzzing across. Note that if you boot up fewer
fuzzers than indicated by the second number passed to -M, you may end up with
poor coverage.
You can also monitor the progress of your jobs from the command line with the
provided afl-whatsup tool. When the instances are no longer finding new paths,
it's probably time to stop.
WARNING: Exercise caution when explicitly specifying the -f option. Each fuzzer
must use a separate temporary file; otherwise, things will go south. One safe
example may be:
$ ./afl-fuzz [...] -S fuzzer10 -f file10.txt ./fuzzed/binary @@
$ ./afl-fuzz [...] -S fuzzer11 -f file11.txt ./fuzzed/binary @@
$ ./afl-fuzz [...] -S fuzzer12 -f file12.txt ./fuzzed/binary @@
This is not a concern if you use @@ without -f and let afl-fuzz come up with the
file name.
3) Multi-system parallelization
-------------------------------
The basic operating principle for multi-system parallelization is similar to
the mechanism explained in section 2. The key difference is that you need to
write a simple script that performs two actions:
- Uses SSH with authorized_keys to connect to every machine and retrieve
a tar archive of the /path/to/sync_dir/<fuzzer_id>/queue/ directories for
every <fuzzer_id> local to the machine. It's best to use a naming scheme
that includes host name in the fuzzer ID, so that you can do something
like:
for s in {1..10}; do
ssh user@host${s} "tar -czf - sync/host${s}_fuzzid*/[qf]*" >host${s}.tgz
done
- Distributes and unpacks these files on all the remaining machines, e.g.:
for s in {1..10}; do
for d in {1..10}; do
test "$s" = "$d" && continue
ssh user@host${d} 'tar -kxzf -' <host${s}.tgz
done
done
There is an example of such a script in experimental/distributed_fuzzing/;
you can also find a more featured, experimental tool developed by
Martijn Bogaard at:
https://github.com/MartijnB/disfuzz-afl
Another client-server implementation from Richo Healey is:
https://github.com/richo/roving
Note that these third-party tools are unsafe to run on systems exposed to the
Internet or to untrusted users.
When developing custom test case sync code, there are several optimizations
to keep in mind:
- The synchronization does not have to happen very often; running the
task every 30 minutes or so may be perfectly fine.
- There is no need to synchronize crashes/ or hangs/; you only need to
copy over queue/* (and ideally, also fuzzer_stats).
- It is not necessary (and not advisable!) to overwrite existing files;
the -k option in tar is a good way to avoid that.
- There is no need to fetch directories for fuzzers that are not running
locally on a particular machine, and were simply copied over onto that
system during earlier runs.
- For large fleets, you will want to consolidate tarballs for each host,
as this will let you use n SSH connections for sync, rather than n*(n-1).
You may also want to implement staged synchronization. For example, you
could have 10 groups of systems, with group 1 pushing test cases only
to group 2; group 2 pushing them only to group 3; and so on, with group
eventually 10 feeding back to group 1.
This arrangement would allow test interesting cases to propagate across
the fleet without having to copy every fuzzer queue to every single host.
- You do not want a "master" instance of afl-fuzz on every system; you should
run them all with -S, and just designate a single process somewhere within
the fleet to run with -M.
It is *not* advisable to skip the synchronization script and run the fuzzers
directly on a network filesystem; unexpected latency and unkillable processes
in I/O wait state can mess things up.
4) Remote monitoring and data collection
----------------------------------------
You can use screen, nohup, tmux, or something equivalent to run remote
instances of afl-fuzz. If you redirect the program's output to a file, it will
automatically switch from a fancy UI to more limited status reports. There is
also basic machine-readable information always written to the fuzzer_stats file
in the output directory. Locally, that information can be interpreted with
afl-whatsup.
In principle, you can use the status screen of the master (-M) instance to
monitor the overall fuzzing progress and decide when to stop. In this
mode, the most important signal is just that no new paths are being found
for a longer while. If you do not have a master instance, just pick any
single secondary instance to watch and go by that.
You can also rely on that instance's output directory to collect the
synthesized corpus that covers all the noteworthy paths discovered anywhere
within the fleet. Secondary (-S) instances do not require any special
monitoring, other than just making sure that they are up.
Keep in mind that crashing inputs are *not* automatically propagated to the
master instance, so you may still want to monitor for crashes fleet-wide
from within your synchronization or health checking scripts (see afl-whatsup).
5) Asymmetric setups
--------------------
It is perhaps worth noting that all of the following is permitted:
- Running afl-fuzz with conjunction with other guided tools that can extend
coverage (e.g., via concolic execution). Third-party tools simply need to
follow the protocol described above for pulling new test cases from
out_dir/<fuzzer_id>/queue/* and writing their own finds to sequentially
numbered id:nnnnnn files in out_dir/<ext_tool_id>/queue/*.
- Running some of the synchronized fuzzers with different (but related)
target binaries. For example, simultaneously stress-testing several
different JPEG parsers (say, IJG jpeg and libjpeg-turbo) while sharing
the discovered test cases can have synergistic effects and improve the
overall coverage.
(In this case, running one -M instance per each binary is a good plan.)
- Having some of the fuzzers invoke the binary in different ways.
For example, 'djpeg' supports several DCT modes, configurable with
a command-line flag, while 'dwebp' supports incremental and one-shot
decoding. In some scenarios, going after multiple distinct modes and then
pooling test cases will improve coverage.
- Much less convincingly, running the synchronized fuzzers with different
starting test cases (e.g., progressive and standard JPEG) or dictionaries.
The synchronization mechanism ensures that the test sets will get fairly
homogeneous over time, but it introduces some initial variability.

@ -0,0 +1,201 @@
=================================
Tips for performance optimization
=================================
This file provides tips for troubleshooting slow or wasteful fuzzing jobs.
See README for the general instruction manual.
1) Keep your test cases small
-----------------------------
This is probably the single most important step to take! Large test cases do
not merely take more time and memory to be parsed by the tested binary, but
also make the fuzzing process dramatically less efficient in several other
ways.
To illustrate, let's say that you're randomly flipping bits in a file, one bit
at a time. Let's assume that if you flip bit #47, you will hit a security bug;
flipping any other bit just results in an invalid document.
Now, if your starting test case is 100 bytes long, you will have a 71% chance of
triggering the bug within the first 1,000 execs - not bad! But if the test case
is 1 kB long, the probability that we will randomly hit the right pattern in
the same timeframe goes down to 11%. And if it has 10 kB of non-essential
cruft, the odds plunge to 1%.
On top of that, with larger inputs, the binary may be now running 5-10x times
slower than before - so the overall drop in fuzzing efficiency may be easily
as high as 500x or so.
In practice, this means that you shouldn't fuzz image parsers with your
vacation photos. Generate a tiny 16x16 picture instead, and run it through
jpegtran or pngcrunch for good measure. The same goes for most other types
of documents.
There's plenty of small starting test cases in ../testcases/* - try them out
or submit new ones!
If you want to start with a larger, third-party corpus, run afl-cmin with an
aggressive timeout on that data set first.
2) Use a simpler target
-----------------------
Consider using a simpler target binary in your fuzzing work. For example, for
image formats, bundled utilities such as djpeg, readpng, or gifhisto are
considerably (10-20x) faster than the convert tool from ImageMagick - all while
exercising roughly the same library-level image parsing code.
Even if you don't have a lightweight harness for a particular target, remember
that you can always use another, related library to generate a corpus that will
be then manually fed to a more resource-hungry program later on.
3) Use LLVM instrumentation
---------------------------
When fuzzing slow targets, you can gain 2x performance improvement by using
the LLVM-based instrumentation mode described in llvm_mode/README.llvm. Note
that this mode requires the use of clang and will not work with GCC.
The LLVM mode also offers a "persistent", in-process fuzzing mode that can
work well for certain types of self-contained libraries, and for fast targets,
can offer performance gains up to 5-10x; and a "deferred fork server" mode
that can offer huge benefits for programs with high startup overhead. Both
modes require you to edit the source code of the fuzzed program, but the
changes often amount to just strategically placing a single line or two.
4) Profile and optimize the binary
----------------------------------
Check for any parameters or settings that obviously improve performance. For
example, the djpeg utility that comes with IJG jpeg and libjpeg-turbo can be
called with:
-dct fast -nosmooth -onepass -dither none -scale 1/4
...and that will speed things up. There is a corresponding drop in the quality
of decoded images, but it's probably not something you care about.
In some programs, it is possible to disable output altogether, or at least use
an output format that is computationally inexpensive. For example, with image
transcoding tools, converting to a BMP file will be a lot faster than to PNG.
With some laid-back parsers, enabling "strict" mode (i.e., bailing out after
first error) may result in smaller files and improved run time without
sacrificing coverage; for example, for sqlite, you may want to specify -bail.
If the program is still too slow, you can use strace -tt or an equivalent
profiling tool to see if the targeted binary is doing anything silly.
Sometimes, you can speed things up simply by specifying /dev/null as the
config file, or disabling some compile-time features that aren't really needed
for the job (try ./configure --help). One of the notoriously resource-consuming
things would be calling other utilities via exec*(), popen(), system(), or
equivalent calls; for example, tar can invoke external decompression tools
when it decides that the input file is a compressed archive.
Some programs may also intentionally call sleep(), usleep(), or nanosleep();
vim is a good example of that. Other programs may attempt fsync() and so on.
There are third-party libraries that make it easy to get rid of such code,
e.g.:
https://launchpad.net/libeatmydata
In programs that are slow due to unavoidable initialization overhead, you may
want to try the LLVM deferred forkserver mode (see llvm_mode/README.llvm),
which can give you speed gains up to 10x, as mentioned above.
Last but not least, if you are using ASAN and the performance is unacceptable,
consider turning it off for now, and manually examining the generated corpus
with an ASAN-enabled binary later on.
5) Instrument just what you need
--------------------------------
Instrument just the libraries you actually want to stress-test right now, one
at a time. Let the program use system-wide, non-instrumented libraries for
any functionality you don't actually want to fuzz. For example, in most
cases, it doesn't make to instrument libgmp just because you're testing a
crypto app that relies on it for bignum math.
Beware of programs that come with oddball third-party libraries bundled with
their source code (Spidermonkey is a good example of this). Check ./configure
options to use non-instrumented system-wide copies instead.
6) Parallelize your fuzzers
---------------------------
The fuzzer is designed to need ~1 core per job. This means that on a, say,
4-core system, you can easily run four parallel fuzzing jobs with relatively
little performance hit. For tips on how to do that, see parallel_fuzzing.txt.
The afl-gotcpu utility can help you understand if you still have idle CPU
capacity on your system. (It won't tell you about memory bandwidth, cache
misses, or similar factors, but they are less likely to be a concern.)
7) Keep memory use and timeouts in check
----------------------------------------
If you have increased the -m or -t limits more than truly necessary, consider
dialing them back down.
For programs that are nominally very fast, but get sluggish for some inputs,
you can also try setting -t values that are more punishing than what afl-fuzz
dares to use on its own. On fast and idle machines, going down to -t 5 may be
a viable plan.
The -m parameter is worth looking at, too. Some programs can end up spending
a fair amount of time allocating and initializing megabytes of memory when
presented with pathological inputs. Low -m values can make them give up sooner
and not waste CPU time.
8) Check OS configuration
-------------------------
There are several OS-level factors that may affect fuzzing speed:
- High system load. Use idle machines where possible. Kill any non-essential
CPU hogs (idle browser windows, media players, complex screensavers, etc).
- Network filesystems, either used for fuzzer input / output, or accessed by
the fuzzed binary to read configuration files (pay special attention to the
home directory - many programs search it for dot-files).
- On-demand CPU scaling. The Linux 'ondemand' governor performs its analysis
on a particular schedule and is known to underestimate the needs of
short-lived processes spawned by afl-fuzz (or any other fuzzer). On Linux,
this can be fixed with:
cd /sys/devices/system/cpu
echo performance | tee cpu*/cpufreq/scaling_governor
On other systems, the impact of CPU scaling will be different; when fuzzing,
use OS-specific tools to find out if all cores are running at full speed.
- Transparent huge pages. Some allocators, such as jemalloc, can incur a
heavy fuzzing penalty when transparent huge pages (THP) are enabled in the
kernel. You can disable this via:
echo never > /sys/kernel/mm/transparent_hugepage/enabled
- Suboptimal scheduling strategies. The significance of this will vary from
one target to another, but on Linux, you may want to make sure that the
following options are set:
echo 1 >/proc/sys/kernel/sched_child_runs_first
echo 1 >/proc/sys/kernel/sched_autogroup_enabled
Setting a different scheduling policy for the fuzzer process - say
SCHED_RR - can usually speed things up, too, but needs to be done with
care.
9) If all other options fail, use -d
------------------------------------
For programs that are genuinely slow, in cases where you really can't escape
using huge input files, or when you simply want to get quick and dirty results
early on, you can always resort to the -d mode.
The mode causes afl-fuzz to skip all the deterministic fuzzing steps, which
makes output a lot less neat and can ultimately make the testing a bit less
in-depth, but it will give you an experience more familiar from other fuzzing
tools.

@ -0,0 +1,354 @@
===============
Sister projects
===============
This doc lists some of the projects that are inspired by, derived from,
designed for, or meant to integrate with AFL. See README for the general
instruction manual.
-------------------------------------------
Support for other languages / environments:
-------------------------------------------
Python AFL (Jakub Wilk)
-----------------------
Allows fuzz-testing of Python programs. Uses custom instrumentation and its
own forkserver.
http://jwilk.net/software/python-afl
Go-fuzz (Dmitry Vyukov)
-----------------------
AFL-inspired guided fuzzing approach for Go targets:
https://github.com/dvyukov/go-fuzz
afl.rs (Keegan McAllister)
--------------------------
Allows Rust features to be easily fuzzed with AFL (using the LLVM mode).
https://github.com/kmcallister/afl.rs
OCaml support (KC Sivaramakrishnan)
-----------------------------------
Adds AFL-compatible instrumentation to OCaml programs.
https://github.com/ocamllabs/opam-repo-dev/pull/23
http://canopy.mirage.io/Posts/Fuzzing
AFL for GCJ Java and other GCC frontends (-)
--------------------------------------------
GCC Java programs are actually supported out of the box - simply rename
afl-gcc to afl-gcj. Unfortunately, by default, unhandled exceptions in GCJ do
not result in abort() being called, so you will need to manually add a
top-level exception handler that exits with SIGABRT or something equivalent.
Other GCC-supported languages should be fairly easy to get working, but may
face similar problems. See https://gcc.gnu.org/frontends.html for a list of
options.
AFL-style in-process fuzzer for LLVM (Kostya Serebryany)
--------------------------------------------------------
Provides an evolutionary instrumentation-guided fuzzing harness that allows
some programs to be fuzzed without the fork / execve overhead. (Similar
functionality is now available as the "persistent" feature described in
../llvm_mode/README.llvm.)
http://llvm.org/docs/LibFuzzer.html
AFL fixup shim (Ben Nagy)
-------------------------
Allows AFL_POST_LIBRARY postprocessors to be written in arbitrary languages
that don't have C / .so bindings. Includes examples in Go.
https://github.com/bnagy/aflfix
TriforceAFL (Tim Newsham and Jesse Hertz)
-----------------------------------------
Leverages QEMU full system emulation mode to allow AFL to target operating
systems and other alien worlds:
https://www.nccgroup.trust/us/about-us/newsroom-and-events/blog/2016/june/project-triforce-run-afl-on-everything/
WinAFL (Ivan Fratric)
---------------------
As the name implies, allows you to fuzz Windows binaries (using DynamoRio).
https://github.com/ivanfratric/winafl
Another Windows alternative may be:
https://github.com/carlosgprado/BrundleFuzz/
----------------
Network fuzzing:
----------------
Preeny (Yan Shoshitaishvili)
----------------------------
Provides a fairly simple way to convince dynamically linked network-centric
programs to read from a file or not fork. Not AFL-specific, but described as
useful by many users. Some assembly required.
https://github.com/zardus/preeny
-------------------------------------------
Distributed fuzzing and related automation:
-------------------------------------------
roving (Richo Healey)
---------------------
A client-server architecture for effortlessly orchestrating AFL runs across
a fleet of machines. You don't want to use this on systems that face the
Internet or live in other untrusted environments.
https://github.com/richo/roving
Distfuzz-AFL (Martijn Bogaard)
------------------------------
Simplifies the management of afl-fuzz instances on remote machines. The
author notes that the current implementation isn't secure and should not
be exposed on the Internet.
https://github.com/MartijnB/disfuzz-afl
AFLDFF (quantumvm)
------------------
A nice GUI for managing AFL jobs.
https://github.com/quantumvm/AFLDFF
afl-launch (Ben Nagy)
---------------------
Batch AFL launcher utility with a simple CLI.
https://github.com/bnagy/afl-launch
AFL Utils (rc0r)
----------------
Simplifies the triage of discovered crashes, start parallel instances, etc.
https://github.com/rc0r/afl-utils
Another crash triage tool:
https://github.com/floyd-fuh/afl-crash-analyzer
afl-fuzzing-scripts (Tobias Ospelt)
-----------------------------------
Simplifies starting up multiple parallel AFL jobs.
https://github.com/floyd-fuh/afl-fuzzing-scripts/
afl-sid (Jacek Wielemborek)
---------------------------
Allows users to more conveniently build and deploy AFL via Docker.
https://github.com/d33tah/afl-sid
Another Docker-related project:
https://github.com/ozzyjohnson/docker-afl
afl-monitor (Paul S. Ziegler)
-----------------------------
Provides more detailed and versatile statistics about your running AFL jobs.
https://github.com/reflare/afl-monitor
-----------------------------------------------------------
Crash triage, coverage analysis, and other companion tools:
-----------------------------------------------------------
afl-crash-analyzer (Tobias Ospelt)
----------------------------------
Makes it easier to navigate and annotate crashing test cases.
https://github.com/floyd-fuh/afl-crash-analyzer/
Crashwalk (Ben Nagy)
--------------------
AFL-aware tool to annotate and sort through crashing test cases.
https://github.com/bnagy/crashwalk
afl-cov (Michael Rash)
----------------------
Produces human-readable coverage data based on the output queue of afl-fuzz.
https://github.com/mrash/afl-cov
afl-sancov (Bhargava Shastry)
-----------------------------
Similar to afl-cov, but uses clang sanitizer instrumentation.
https://github.com/bshastry/afl-sancov
RecidiVM (Jakub Wilk)
---------------------
Makes it easy to estimate memory usage limits when fuzzing with ASAN or MSAN.
http://jwilk.net/software/recidivm
aflize (Jacek Wielemborek)
--------------------------
Automatically build AFL-enabled versions of Debian packages.
https://github.com/d33tah/aflize
afl-ddmin-mod (Markus Teufelberger)
-----------------------------------
A variant of afl-tmin that uses a more sophisticated (but slower)
minimization algorithm.
https://github.com/MarkusTeufelberger/afl-ddmin-mod
afl-kit (Kuang-che Wu)
----------------------
Replacements for afl-cmin and afl-tmin with additional features, such
as the ability to filter crashes based on stderr patterns.
https://github.com/kcwu/afl-kit
-------------------------------
Narrow-purpose or experimental:
-------------------------------
Cygwin support (Ali Rizvi-Santiago)
-----------------------------------
Pretty self-explanatory. As per the author, this "mostly" ports AFL to
Windows. Field reports welcome!
https://github.com/arizvisa/afl-cygwin
Pause and resume scripts (Ben Nagy)
-----------------------------------
Simple automation to suspend and resume groups of fuzzing jobs.
https://github.com/bnagy/afl-trivia
Static binary-only instrumentation (Aleksandar Nikolich)
--------------------------------------------------------
Allows black-box binaries to be instrumented statically (i.e., by modifying
the binary ahead of the time, rather than translating it on the run). Author
reports better performance compared to QEMU, but occasional translation
errors with stripped binaries.
https://github.com/vrtadmin/moflow/tree/master/afl-dyninst
AFL PIN (Parker Thompson)
-------------------------
Early-stage Intel PIN instrumentation support (from before we settled on
faster-running QEMU).
https://github.com/mothran/aflpin
AFL-style instrumentation in llvm (Kostya Serebryany)
-----------------------------------------------------
Allows AFL-equivalent instrumentation to be injected at compiler level.
This is currently not supported by AFL as-is, but may be useful in other
projects.
https://code.google.com/p/address-sanitizer/wiki/AsanCoverage#Coverage_counters
AFL JS (Han Choongwoo)
----------------------
One-off optimizations to speed up the fuzzing of JavaScriptCore (now likely
superseded by LLVM deferred forkserver init - see llvm_mode/README.llvm).
https://github.com/tunz/afl-fuzz-js
AFL harness for fwknop (Michael Rash)
-------------------------------------
An example of a fairly involved integration with AFL.
https://github.com/mrash/fwknop/tree/master/test/afl
Building harnesses for DNS servers (Jonathan Foote, Ron Bowes)
--------------------------------------------------------------
Two articles outlining the general principles and showing some example code.
https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
https://goo.gl/j9EgFf
Fuzzer shell for SQLite (Richard Hipp)
--------------------------------------
A simple SQL shell designed specifically for fuzzing the underlying library.
http://www.sqlite.org/src/artifact/9e7e273da2030371
Support for Python mutation modules (Christian Holler)
------------------------------------------------------
https://github.com/choller/afl/blob/master/docs/mozilla/python_modules.txt
Support for selective instrumentation (Christian Holler)
--------------------------------------------------------
https://github.com/choller/afl/blob/master/docs/mozilla/partial_instrumentation.txt
Kernel fuzzing (Dmitry Vyukov)
------------------------------
A similar guided approach as applied to fuzzing syscalls:
https://github.com/google/syzkaller/wiki/Found-Bugs
https://github.com/dvyukov/linux/commit/33787098ffaaa83b8a7ccf519913ac5fd6125931
http://events.linuxfoundation.org/sites/events/files/slides/AFL%20filesystem%20fuzzing%2C%20Vault%202016_0.pdf
Android support (ele7enxxh)
---------------------------
Based on a somewhat dated version of AFL:
https://github.com/ele7enxxh/android-afl
CGI wrapper (floyd)
-------------------
Facilitates the testing of CGI scripts.
https://github.com/floyd-fuh/afl-cgi-wrapper
Fuzzing difficulty estimation (Marcel Boehme)
---------------------------------------------
A fork of AFL that tries to quantify the likelihood of finding additional
paths or crashes at any point in a fuzzing job.
https://github.com/mboehme/pythia

@ -0,0 +1,408 @@
===============================
Understanding the status screen
===============================
This document provides an overview of the status screen - plus tips for
troubleshooting any warnings and red text shown in the UI. See README for
the general instruction manual.
0) A note about colors
----------------------
The status screen and error messages use colors to keep things readable and
attract your attention to the most important details. For example, red almost
always means "consult this doc" :-)
Unfortunately, the UI will render correctly only if your terminal is using
traditional un*x palette (white text on black background) or something close
to that.
If you are using inverse video, you may want to change your settings, say:
- For GNOME Terminal, go to Edit > Profile preferences, select the "colors"
tab, and from the list of built-in schemes, choose "white on black".
- For the MacOS X Terminal app, open a new window using the "Pro" scheme via
the Shell > New Window menu (or make "Pro" your default).
Alternatively, if you really like your current colors, you can edit config.h
to comment out USE_COLORS, then do 'make clean all'.
I'm not aware of any other simple way to make this work without causing
other side effects - sorry about that.
With that out of the way, let's talk about what's actually on the screen...
1) Process timing
-----------------
+----------------------------------------------------+
| run time : 0 days, 8 hrs, 32 min, 43 sec |
| last new path : 0 days, 0 hrs, 6 min, 40 sec |
| last uniq crash : none seen yet |
| last uniq hang : 0 days, 1 hrs, 24 min, 32 sec |
+----------------------------------------------------+
This section is fairly self-explanatory: it tells you how long the fuzzer has
been running and how much time has elapsed since its most recent finds. This is
broken down into "paths" (a shorthand for test cases that trigger new execution
patterns), crashes, and hangs.
When it comes to timing: there is no hard rule, but most fuzzing jobs should be
expected to run for days or weeks; in fact, for a moderately complex project, the
first pass will probably take a day or so. Every now and then, some jobs
will be allowed to run for months.
There's one important thing to watch out for: if the tool is not finding new
paths within several minutes of starting, you're probably not invoking the
target binary correctly and it never gets to parse the input files we're
throwing at it; another possible explanations are that the default memory limit
(-m) is too restrictive, and the program exits after failing to allocate a
buffer very early on; or that the input files are patently invalid and always
fail a basic header check.
If there are no new paths showing up for a while, you will eventually see a big
red warning in this section, too :-)
2) Overall results
------------------
+-----------------------+
| cycles done : 0 |
| total paths : 2095 |
| uniq crashes : 0 |
| uniq hangs : 19 |
+-----------------------+
The first field in this section gives you the count of queue passes done so far
- that is, the number of times the fuzzer went over all the interesting test
cases discovered so far, fuzzed them, and looped back to the very beginning.
Every fuzzing session should be allowed to complete at least one cycle; and
ideally, should run much longer than that.
As noted earlier, the first pass can take a day or longer, so sit back and
relax. If you want to get broader but more shallow coverage right away, try
the -d option - it gives you a more familiar experience by skipping the
deterministic fuzzing steps. It is, however, inferior to the standard mode in
a couple of subtle ways.
To help make the call on when to hit Ctrl-C, the cycle counter is color-coded.
It is shown in magenta during the first pass, progresses to yellow if new finds
are still being made in subsequent rounds, then blue when that ends - and
finally, turns green after the fuzzer hasn't been seeing any action for a
longer while.
The remaining fields in this part of the screen should be pretty obvious:
there's the number of test cases ("paths") discovered so far, and the number of
unique faults. The test cases, crashes, and hangs can be explored in real-time
by browsing the output directory, as discussed in the README.
3) Cycle progress
-----------------
+-------------------------------------+
| now processing : 1296 (61.86%) |
| paths timed out : 0 (0.00%) |
+-------------------------------------+
This box tells you how far along the fuzzer is with the current queue cycle: it
shows the ID of the test case it is currently working on, plus the number of
inputs it decided to ditch because they were persistently timing out.
The "*" suffix sometimes shown in the first line means that the currently
processed path is not "favored" (a property discussed later on, in section 6).
If you feel that the fuzzer is progressing too slowly, see the note about the
-d option in section 2 of this doc.
4) Map coverage
---------------
+--------------------------------------+
| map density : 10.15% / 29.07% |
| count coverage : 4.03 bits/tuple |
+--------------------------------------+
The section provides some trivia about the coverage observed by the
instrumentation embedded in the target binary.
The first line in the box tells you how many branch tuples we have already
hit, in proportion to how much the bitmap can hold. The number on the left
describes the current input; the one on the right is the value for the entire
input corpus.
Be wary of extremes:
- Absolute numbers below 200 or so suggest one of three things: that the
program is extremely simple; that it is not instrumented properly (e.g.,
due to being linked against a non-instrumented copy of the target
library); or that it is bailing out prematurely on your input test cases.
The fuzzer will try to mark this in pink, just to make you aware.
- Percentages over 70% may very rarely happen with very complex programs
that make heavy use of template-generated code.
Because high bitmap density makes it harder for the fuzzer to reliably
discern new program states, I recommend recompiling the binary with
AFL_INST_RATIO=10 or so and trying again (see env_variables.txt).
The fuzzer will flag high percentages in red. Chances are, you will never
see that unless you're fuzzing extremely hairy software (say, v8, perl,
ffmpeg).
The other line deals with the variability in tuple hit counts seen in the
binary. In essence, if every taken branch is always taken a fixed number of
times for all the inputs we have tried, this will read "1.00". As we manage
to trigger other hit counts for every branch, the needle will start to move
toward "8.00" (every bit in the 8-bit map hit), but will probably never
reach that extreme.
Together, the values can be useful for comparing the coverage of several
different fuzzing jobs that rely on the same instrumented binary.
5) Stage progress
-----------------
+-------------------------------------+
| now trying : interest 32/8 |
| stage execs : 3996/34.4k (11.62%) |
| total execs : 27.4M |
| exec speed : 891.7/sec |
+-------------------------------------+
This part gives you an in-depth peek at what the fuzzer is actually doing right
now. It tells you about the current stage, which can be any of:
- calibration - a pre-fuzzing stage where the execution path is examined
to detect anomalies, establish baseline execution speed, and so on. Executed
very briefly whenever a new find is being made.
- trim L/S - another pre-fuzzing stage where the test case is trimmed to the
shortest form that still produces the same execution path. The length (L)
and stepover (S) are chosen in general relationship to file size.
- bitflip L/S - deterministic bit flips. There are L bits toggled at any given
time, walking the input file with S-bit increments. The current L/S variants
are: 1/1, 2/1, 4/1, 8/8, 16/8, 32/8.
- arith L/8 - deterministic arithmetics. The fuzzer tries to subtract or add
small integers to 8-, 16-, and 32-bit values. The stepover is always 8 bits.
- interest L/8 - deterministic value overwrite. The fuzzer has a list of known
"interesting" 8-, 16-, and 32-bit values to try. The stepover is 8 bits.
- extras - deterministic injection of dictionary terms. This can be shown as
"user" or "auto", depending on whether the fuzzer is using a user-supplied
dictionary (-x) or an auto-created one. You will also see "over" or "insert",
depending on whether the dictionary words overwrite existing data or are
inserted by offsetting the remaining data to accommodate their length.
- havoc - a sort-of-fixed-length cycle with stacked random tweaks. The
operations attempted during this stage include bit flips, overwrites with
random and "interesting" integers, block deletion, block duplication, plus
assorted dictionary-related operations (if a dictionary is supplied in the
first place).
- splice - a last-resort strategy that kicks in after the first full queue
cycle with no new paths. It is equivalent to 'havoc', except that it first
splices together two random inputs from the queue at some arbitrarily
selected midpoint.
- sync - a stage used only when -M or -S is set (see parallel_fuzzing.txt).
No real fuzzing is involved, but the tool scans the output from other
fuzzers and imports test cases as necessary. The first time this is done,
it may take several minutes or so.
The remaining fields should be fairly self-evident: there's the exec count
progress indicator for the current stage, a global exec counter, and a
benchmark for the current program execution speed. This may fluctuate from
one test case to another, but the benchmark should be ideally over 500 execs/sec
most of the time - and if it stays below 100, the job will probably take very
long.
The fuzzer will explicitly warn you about slow targets, too. If this happens,
see the perf_tips.txt file included with the fuzzer for ideas on how to speed
things up.
6) Findings in depth
--------------------
+--------------------------------------+
| favored paths : 879 (41.96%) |
| new edges on : 423 (20.19%) |
| total crashes : 0 (0 unique) |
| total tmouts : 24 (19 unique) |
+--------------------------------------+
This gives you several metrics that are of interest mostly to complete nerds.
The section includes the number of paths that the fuzzer likes the most based
on a minimization algorithm baked into the code (these will get considerably
more air time), and the number of test cases that actually resulted in better
edge coverage (versus just pushing the branch hit counters up). There are also
additional, more detailed counters for crashes and timeouts.
Note that the timeout counter is somewhat different from the hang counter; this
one includes all test cases that exceeded the timeout, even if they did not
exceed it by a margin sufficient to be classified as hangs.
7) Fuzzing strategy yields
--------------------------
+-----------------------------------------------------+
| bit flips : 57/289k, 18/289k, 18/288k |
| byte flips : 0/36.2k, 4/35.7k, 7/34.6k |
| arithmetics : 53/2.54M, 0/537k, 0/55.2k |
| known ints : 8/322k, 12/1.32M, 10/1.70M |
| dictionary : 9/52k, 1/53k, 1/24k |
| havoc : 1903/20.0M, 0/0 |
| trim : 20.31%/9201, 17.05% |
+-----------------------------------------------------+
This is just another nerd-targeted section keeping track of how many paths we
have netted, in proportion to the number of execs attempted, for each of the
fuzzing strategies discussed earlier on. This serves to convincingly validate
assumptions about the usefulness of the various approaches taken by afl-fuzz.
The trim strategy stats in this section are a bit different than the rest.
The first number in this line shows the ratio of bytes removed from the input
files; the second one corresponds to the number of execs needed to achieve this
goal. Finally, the third number shows the proportion of bytes that, although
not possible to remove, were deemed to have no effect and were excluded from
some of the more expensive deterministic fuzzing steps.
8) Path geometry
----------------
+---------------------+
| levels : 5 |
| pending : 1570 |
| pend fav : 583 |
| own finds : 0 |
| imported : 0 |
| stability : 100.00% |
+---------------------+
The first field in this section tracks the path depth reached through the
guided fuzzing process. In essence: the initial test cases supplied by the
user are considered "level 1". The test cases that can be derived from that
through traditional fuzzing are considered "level 2"; the ones derived by
using these as inputs to subsequent fuzzing rounds are "level 3"; and so forth.
The maximum depth is therefore a rough proxy for how much value you're getting
out of the instrumentation-guided approach taken by afl-fuzz.
The next field shows you the number of inputs that have not gone through any
fuzzing yet. The same stat is also given for "favored" entries that the fuzzer
really wants to get to in this queue cycle (the non-favored entries may have to
wait a couple of cycles to get their chance).
Next, we have the number of new paths found during this fuzzing section and
imported from other fuzzer instances when doing parallelized fuzzing; and the
extent to which identical inputs appear to sometimes produce variable behavior
in the tested binary.
That last bit is actually fairly interesting: it measures the consistency of
observed traces. If a program always behaves the same for the same input data,
it will earn a score of 100%. When the value is lower but still shown in purple,
the fuzzing process is unlikely to be negatively affected. If it goes into red,
you may be in trouble, since AFL will have difficulty discerning between
meaningful and "phantom" effects of tweaking the input file.
Now, most targets will just get a 100% score, but when you see lower figures,
there are several things to look at:
- The use of uninitialized memory in conjunction with some intrinsic sources
of entropy in the tested binary. Harmless to AFL, but could be indicative
of a security bug.
- Attempts to manipulate persistent resources, such as left over temporary
files or shared memory objects. This is usually harmless, but you may want
to double-check to make sure the program isn't bailing out prematurely.
Running out of disk space, SHM handles, or other global resources can
trigger this, too.
- Hitting some functionality that is actually designed to behave randomly.
Generally harmless. For example, when fuzzing sqlite, an input like
'select random();' will trigger a variable execution path.
- Multiple threads executing at once in semi-random order. This is harmless
when the 'stability' metric stays over 90% or so, but can become an issue
if not. Here's what to try:
- Use afl-clang-fast from llvm_mode/ - it uses a thread-local tracking
model that is less prone to concurrency issues,
- See if the target can be compiled or run without threads. Common
./configure options include --without-threads, --disable-pthreads, or
--disable-openmp.
- Replace pthreads with GNU Pth (https://www.gnu.org/software/pth/), which
allows you to use a deterministic scheduler.
- In persistent mode, minor drops in the "stability" metric can be normal,
because not all the code behaves identically when re-entered; but major
dips may signify that the code within __AFL_LOOP() is not behaving
correctly on subsequent iterations (e.g., due to incomplete clean-up or
reinitialization of the state) and that most of the fuzzing effort goes
to waste.
The paths where variable behavior is detected are marked with a matching entry
in the <out_dir>/queue/.state/variable_behavior/ directory, so you can look
them up easily.
9) CPU load
-----------
[cpu: 25%]
This tiny widget shows the apparent CPU utilization on the local system. It is
calculated by taking the number of processes in the "runnable" state, and then
comparing it to the number of logical cores on the system.
If the value is shown in green, you are using fewer CPU cores than available on
your system and can probably parallelize to improve performance; for tips on
how to do that, see parallel_fuzzing.txt.
If the value is shown in red, your CPU is *possibly* oversubscribed, and
running additional fuzzers may not give you any benefits.
Of course, this benchmark is very simplistic; it tells you how many processes
are ready to run, but not how resource-hungry they may be. It also doesn't
distinguish between physical cores, logical cores, and virtualized CPUs; the
performance characteristics of each of these will differ quite a bit.
If you want a more accurate measurement, you can run the afl-gotcpu utility
from the command line.
10) Addendum: status and plot files
-----------------------------------
For unattended operation, some of the key status screen information can be also
found in a machine-readable format in the fuzzer_stats file in the output
directory. This includes:
- start_time - unix time indicating the start time of afl-fuzz
- last_update - unix time corresponding to the last update of this file
- fuzzer_pid - PID of the fuzzer process
- cycles_done - queue cycles completed so far
- execs_done - number of execve() calls attempted
- execs_per_sec - current number of execs per second
- paths_total - total number of entries in the queue
- paths_found - number of entries discovered through local fuzzing
- paths_imported - number of entries imported from other instances
- max_depth - number of levels in the generated data set
- cur_path - currently processed entry number
- pending_favs - number of favored entries still waiting to be fuzzed
- pending_total - number of all entries waiting to be fuzzed
- stability - percentage of bitmap bytes that behave consistently
- variable_paths - number of test cases showing variable behavior
- unique_crashes - number of unique crashes recorded
- unique_hangs - number of unique hangs encountered
- command_line - full command line used for the fuzzing session
- slowest_exec_ms- real time of the slowest execution in ms
- peak_rss_mb - max rss usage reached during fuzzing in mb
Most of these map directly to the UI elements discussed earlier on.
On top of that, you can also find an entry called 'plot_data', containing a
plottable history for most of these fields. If you have gnuplot installed, you
can turn this into a nice progress report with the included 'afl-plot' tool.

@ -0,0 +1,563 @@
===================================
Technical "whitepaper" for afl-fuzz
===================================
This document provides a quick overview of the guts of American Fuzzy Lop.
See README for the general instruction manual; and for a discussion of
motivations and design goals behind AFL, see historical_notes.txt.
0) Design statement
-------------------
American Fuzzy Lop does its best not to focus on any singular principle of
operation and not be a proof-of-concept for any specific theory. The tool can
be thought of as a collection of hacks that have been tested in practice,
found to be surprisingly effective, and have been implemented in the simplest,
most robust way I could think of at the time.
Many of the resulting features are made possible thanks to the availability of
lightweight instrumentation that served as a foundation for the tool, but this
mechanism should be thought of merely as a means to an end. The only true
governing principles are speed, reliability, and ease of use.
1) Coverage measurements
------------------------
The instrumentation injected into compiled programs captures branch (edge)
coverage, along with coarse branch-taken hit counts. The code injected at
branch points is essentially equivalent to:
cur_location = <COMPILE_TIME_RANDOM>;
shared_mem[cur_location ^ prev_location]++;
prev_location = cur_location >> 1;
The cur_location value is generated randomly to simplify the process of
linking complex projects and keep the XOR output distributed uniformly.
The shared_mem[] array is a 64 kB SHM region passed to the instrumented binary
by the caller. Every byte set in the output map can be thought of as a hit for
a particular (branch_src, branch_dst) tuple in the instrumented code.
The size of the map is chosen so that collisions are sporadic with almost all
of the intended targets, which usually sport between 2k and 10k discoverable
branch points:
Branch cnt | Colliding tuples | Example targets
------------+------------------+-----------------
1,000 | 0.75% | giflib, lzo
2,000 | 1.5% | zlib, tar, xz
5,000 | 3.5% | libpng, libwebp
10,000 | 7% | libxml
20,000 | 14% | sqlite
50,000 | 30% | -
At the same time, its size is small enough to allow the map to be analyzed
in a matter of microseconds on the receiving end, and to effortlessly fit
within L2 cache.
This form of coverage provides considerably more insight into the execution
path of the program than simple block coverage. In particular, it trivially
distinguishes between the following execution traces:
A -> B -> C -> D -> E (tuples: AB, BC, CD, DE)
A -> B -> D -> C -> E (tuples: AB, BD, DC, CE)
This aids the discovery of subtle fault conditions in the underlying code,
because security vulnerabilities are more often associated with unexpected
or incorrect state transitions than with merely reaching a new basic block.
The reason for the shift operation in the last line of the pseudocode shown
earlier in this section is to preserve the directionality of tuples (without
this, A ^ B would be indistinguishable from B ^ A) and to retain the identity
of tight loops (otherwise, A ^ A would be obviously equal to B ^ B).
The absence of simple saturating arithmetic opcodes on Intel CPUs means that
the hit counters can sometimes wrap around to zero. Since this is a fairly
unlikely and localized event, it's seen as an acceptable performance trade-off.
2) Detecting new behaviors
--------------------------
The fuzzer maintains a global map of tuples seen in previous executions; this
data can be rapidly compared with individual traces and updated in just a couple
of dword- or qword-wide instructions and a simple loop.
When a mutated input produces an execution trace containing new tuples, the
corresponding input file is preserved and routed for additional processing
later on (see section #3). Inputs that do not trigger new local-scale state
transitions in the execution trace (i.e., produce no new tuples) are discarded,
even if their overall control flow sequence is unique.
This approach allows for a very fine-grained and long-term exploration of
program state while not having to perform any computationally intensive and
fragile global comparisons of complex execution traces, and while avoiding the
scourge of path explosion.
To illustrate the properties of the algorithm, consider that the second trace
shown below would be considered substantially new because of the presence of
new tuples (CA, AE):
#1: A -> B -> C -> D -> E
#2: A -> B -> C -> A -> E
At the same time, with #2 processed, the following pattern will not be seen
as unique, despite having a markedly different overall execution path:
#3: A -> B -> C -> A -> B -> C -> A -> B -> C -> D -> E
In addition to detecting new tuples, the fuzzer also considers coarse tuple
hit counts. These are divided into several buckets:
1, 2, 3, 4-7, 8-15, 16-31, 32-127, 128+
To some extent, the number of buckets is an implementation artifact: it allows
an in-place mapping of an 8-bit counter generated by the instrumentation to
an 8-position bitmap relied on by the fuzzer executable to keep track of the
already-seen execution counts for each tuple.
Changes within the range of a single bucket are ignored; transition from one
bucket to another is flagged as an interesting change in program control flow,
and is routed to the evolutionary process outlined in the section below.
The hit count behavior provides a way to distinguish between potentially
interesting control flow changes, such as a block of code being executed
twice when it was normally hit only once. At the same time, it is fairly
insensitive to empirically less notable changes, such as a loop going from
47 cycles to 48. The counters also provide some degree of "accidental"
immunity against tuple collisions in dense trace maps.
The execution is policed fairly heavily through memory and execution time
limits; by default, the timeout is set at 5x the initially-calibrated
execution speed, rounded up to 20 ms. The aggressive timeouts are meant to
prevent dramatic fuzzer performance degradation by descending into tarpits
that, say, improve coverage by 1% while being 100x slower; we pragmatically
reject them and hope that the fuzzer will find a less expensive way to reach
the same code. Empirical testing strongly suggests that more generous time
limits are not worth the cost.
3) Evolving the input queue
---------------------------
Mutated test cases that produced new state transitions within the program are
added to the input queue and used as a starting point for future rounds of
fuzzing. They supplement, but do not automatically replace, existing finds.
In contrast to more greedy genetic algorithms, this approach allows the tool
to progressively explore various disjoint and possibly mutually incompatible
features of the underlying data format, as shown in this image:
http://lcamtuf.coredump.cx/afl/afl_gzip.png
Several practical examples of the results of this algorithm are discussed
here:
http://lcamtuf.blogspot.com/2014/11/pulling-jpegs-out-of-thin-air.html
http://lcamtuf.blogspot.com/2014/11/afl-fuzz-nobody-expects-cdata-sections.html
The synthetic corpus produced by this process is essentially a compact
collection of "hmm, this does something new!" input files, and can be used to
seed any other testing processes down the line (for example, to manually
stress-test resource-intensive desktop apps).
With this approach, the queue for most targets grows to somewhere between 1k
and 10k entries; approximately 10-30% of this is attributable to the discovery
of new tuples, and the remainder is associated with changes in hit counts.
The following table compares the relative ability to discover file syntax and
explore program states when using several different approaches to guided
fuzzing. The instrumented target was GNU patch 2.7.3 compiled with -O3 and
seeded with a dummy text file; the session consisted of a single pass over the
input queue with afl-fuzz:
Fuzzer guidance | Blocks | Edges | Edge hit | Highest-coverage
strategy used | reached | reached | cnt var | test case generated
------------------+---------+---------+----------+---------------------------
(Initial file) | 156 | 163 | 1.00 | (none)
| | | |
Blind fuzzing S | 182 | 205 | 2.23 | First 2 B of RCS diff
Blind fuzzing L | 228 | 265 | 2.23 | First 4 B of -c mode diff
Block coverage | 855 | 1,130 | 1.57 | Almost-valid RCS diff
Edge coverage | 1,452 | 2,070 | 2.18 | One-chunk -c mode diff
AFL model | 1,765 | 2,597 | 4.99 | Four-chunk -c mode diff
The first entry for blind fuzzing ("S") corresponds to executing just a single
round of testing; the second set of figures ("L") shows the fuzzer running in a
loop for a number of execution cycles comparable with that of the instrumented
runs, which required more time to fully process the growing queue.
Roughly similar results have been obtained in a separate experiment where the
fuzzer was modified to compile out all the random fuzzing stages and leave just
a series of rudimentary, sequential operations such as walking bit flips.
Because this mode would be incapable of altering the size of the input file,
the sessions were seeded with a valid unified diff:
Queue extension | Blocks | Edges | Edge hit | Number of unique
strategy used | reached | reached | cnt var | crashes found
------------------+---------+---------+----------+------------------
(Initial file) | 624 | 717 | 1.00 | -
| | | |
Blind fuzzing | 1,101 | 1,409 | 1.60 | 0
Block coverage | 1,255 | 1,649 | 1.48 | 0
Edge coverage | 1,259 | 1,734 | 1.72 | 0
AFL model | 1,452 | 2,040 | 3.16 | 1
At noted earlier on, some of the prior work on genetic fuzzing relied on
maintaining a single test case and evolving it to maximize coverage. At least
in the tests described above, this "greedy" approach appears to confer no
substantial benefits over blind fuzzing strategies.
4) Culling the corpus
---------------------
The progressive state exploration approach outlined above means that some of
the test cases synthesized later on in the game may have edge coverage that
is a strict superset of the coverage provided by their ancestors.
To optimize the fuzzing effort, AFL periodically re-evaluates the queue using a
fast algorithm that selects a smaller subset of test cases that still cover
every tuple seen so far, and whose characteristics make them particularly
favorable to the tool.
The algorithm works by assigning every queue entry a score proportional to its
execution latency and file size; and then selecting lowest-scoring candidates
for each tuple.
The tuples are then processed sequentially using a simple workflow:
1) Find next tuple not yet in the temporary working set,
2) Locate the winning queue entry for this tuple,
3) Register *all* tuples present in that entry's trace in the working set,
4) Go to #1 if there are any missing tuples in the set.
The generated corpus of "favored" entries is usually 5-10x smaller than the
starting data set. Non-favored entries are not discarded, but they are skipped
with varying probabilities when encountered in the queue:
- If there are new, yet-to-be-fuzzed favorites present in the queue, 99%
of non-favored entries will be skipped to get to the favored ones.
- If there are no new favorites:
- If the current non-favored entry was fuzzed before, it will be skipped
95% of the time.
- If it hasn't gone through any fuzzing rounds yet, the odds of skipping
drop down to 75%.
Based on empirical testing, this provides a reasonable balance between queue
cycling speed and test case diversity.
Slightly more sophisticated but much slower culling can be performed on input
or output corpora with afl-cmin. This tool permanently discards the redundant
entries and produces a smaller corpus suitable for use with afl-fuzz or
external tools.
5) Trimming input files
-----------------------
File size has a dramatic impact on fuzzing performance, both because large
files make the target binary slower, and because they reduce the likelihood
that a mutation would touch important format control structures, rather than
redundant data blocks. This is discussed in more detail in perf_tips.txt.
The possibility that the user will provide a low-quality starting corpus aside,
some types of mutations can have the effect of iteratively increasing the size
of the generated files, so it is important to counter this trend.
Luckily, the instrumentation feedback provides a simple way to automatically
trim down input files while ensuring that the changes made to the files have no
impact on the execution path.
The built-in trimmer in afl-fuzz attempts to sequentially remove blocks of data
with variable length and stepover; any deletion that doesn't affect the checksum
of the trace map is committed to disk. The trimmer is not designed to be
particularly thorough; instead, it tries to strike a balance between precision
and the number of execve() calls spent on the process, selecting the block size
and stepover to match. The average per-file gains are around 5-20%.
The standalone afl-tmin tool uses a more exhaustive, iterative algorithm, and
also attempts to perform alphabet normalization on the trimmed files. The
operation of afl-tmin is as follows.
First, the tool automatically selects the operating mode. If the initial input
crashes the target binary, afl-tmin will run in non-instrumented mode, simply
keeping any tweaks that produce a simpler file but still crash the target. If
the target is non-crashing, the tool uses an instrumented mode and keeps only
the tweaks that produce exactly the same execution path.
The actual minimization algorithm is:
1) Attempt to zero large blocks of data with large stepovers. Empirically,
this is shown to reduce the number of execs by preempting finer-grained
efforts later on.
2) Perform a block deletion pass with decreasing block sizes and stepovers,
binary-search-style.
3) Perform alphabet normalization by counting unique characters and trying
to bulk-replace each with a zero value.
4) As a last result, perform byte-by-byte normalization on non-zero bytes.
Instead of zeroing with a 0x00 byte, afl-tmin uses the ASCII digit '0'. This
is done because such a modification is much less likely to interfere with
text parsing, so it is more likely to result in successful minimization of
text files.
The algorithm used here is less involved than some other test case
minimization approaches proposed in academic work, but requires far fewer
executions and tends to produce comparable results in most real-world
applications.
6) Fuzzing strategies
---------------------
The feedback provided by the instrumentation makes it easy to understand the
value of various fuzzing strategies and optimize their parameters so that they
work equally well across a wide range of file types. The strategies used by
afl-fuzz are generally format-agnostic and are discussed in more detail here:
http://lcamtuf.blogspot.com/2014/08/binary-fuzzing-strategies-what-works.html
It is somewhat notable that especially early on, most of the work done by
afl-fuzz is actually highly deterministic, and progresses to random stacked
modifications and test case splicing only at a later stage. The deterministic
strategies include:
- Sequential bit flips with varying lengths and stepovers,
- Sequential addition and subtraction of small integers,
- Sequential insertion of known interesting integers (0, 1, INT_MAX, etc),
The purpose of opening with deterministic steps is related to their tendency to
produce compact test cases and small diffs between the non-crashing and crashing
inputs.
With deterministic fuzzing out of the way, the non-deterministic steps include
stacked bit flips, insertions, deletions, arithmetics, and splicing of different
test cases.
The relative yields and execve() costs of all these strategies have been
investigated and are discussed in the aforementioned blog post.
For the reasons discussed in historical_notes.txt (chiefly, performance,
simplicity, and reliability), AFL generally does not try to reason about the
relationship between specific mutations and program states; the fuzzing steps
are nominally blind, and are guided only by the evolutionary design of the
input queue.
That said, there is one (trivial) exception to this rule: when a new queue
entry goes through the initial set of deterministic fuzzing steps, and tweaks to
some regions in the file are observed to have no effect on the checksum of the
execution path, they may be excluded from the remaining phases of
deterministic fuzzing - and the fuzzer may proceed straight to random tweaks.
Especially for verbose, human-readable data formats, this can reduce the number
of execs by 10-40% or so without an appreciable drop in coverage. In extreme
cases, such as normally block-aligned tar archives, the gains can be as high as
90%.
Because the underlying "effector maps" are local every queue entry and remain
in force only during deterministic stages that do not alter the size or the
general layout of the underlying file, this mechanism appears to work very
reliably and proved to be simple to implement.
7) Dictionaries
---------------
The feedback provided by the instrumentation makes it easy to automatically
identify syntax tokens in some types of input files, and to detect that certain
combinations of predefined or auto-detected dictionary terms constitute a
valid grammar for the tested parser.
A discussion of how these features are implemented within afl-fuzz can be found
here:
http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html
In essence, when basic, typically easily-obtained syntax tokens are combined
together in a purely random manner, the instrumentation and the evolutionary
design of the queue together provide a feedback mechanism to differentiate
between meaningless mutations and ones that trigger new behaviors in the
instrumented code - and to incrementally build more complex syntax on top of
this discovery.
The dictionaries have been shown to enable the fuzzer to rapidly reconstruct
the grammar of highly verbose and complex languages such as JavaScript, SQL,
or XML; several examples of generated SQL statements are given in the blog
post mentioned above.
Interestingly, the AFL instrumentation also allows the fuzzer to automatically
isolate syntax tokens already present in an input file. It can do so by looking
for run of bytes that, when flipped, produce a consistent change to the
program's execution path; this is suggestive of an underlying atomic comparison
to a predefined value baked into the code. The fuzzer relies on this signal
to build compact "auto dictionaries" that are then used in conjunction with
other fuzzing strategies.
8) De-duping crashes
--------------------
De-duplication of crashes is one of the more important problems for any
competent fuzzing tool. Many of the naive approaches run into problems; in
particular, looking just at the faulting address may lead to completely
unrelated issues being clustered together if the fault happens in a common
library function (say, strcmp, strcpy); while checksumming call stack
backtraces can lead to extreme crash count inflation if the fault can be
reached through a number of different, possibly recursive code paths.
The solution implemented in afl-fuzz considers a crash unique if any of two
conditions are met:
- The crash trace includes a tuple not seen in any of the previous crashes,
- The crash trace is missing a tuple that was always present in earlier
faults.
The approach is vulnerable to some path count inflation early on, but exhibits
a very strong self-limiting effect, similar to the execution path analysis
logic that is the cornerstone of afl-fuzz.
9) Investigating crashes
------------------------
The exploitability of many types of crashes can be ambiguous; afl-fuzz tries
to address this by providing a crash exploration mode where a known-faulting
test case is fuzzed in a manner very similar to the normal operation of the
fuzzer, but with a constraint that causes any non-crashing mutations to be
thrown away.
A detailed discussion of the value of this approach can be found here:
http://lcamtuf.blogspot.com/2014/11/afl-fuzz-crash-exploration-mode.html
The method uses instrumentation feedback to explore the state of the crashing
program to get past the ambiguous faulting condition and then isolate the
newly-found inputs for human review.
On the subject of crashes, it is worth noting that in contrast to normal
queue entries, crashing inputs are *not* trimmed; they are kept exactly as
discovered to make it easier to compare them to the parent, non-crashing entry
in the queue. That said, afl-tmin can be used to shrink them at will.
10) The fork server
-------------------
To improve performance, afl-fuzz uses a "fork server", where the fuzzed process
goes through execve(), linking, and libc initialization only once, and is then
cloned from a stopped process image by leveraging copy-on-write. The
implementation is described in more detail here:
http://lcamtuf.blogspot.com/2014/10/fuzzing-binaries-without-execve.html
The fork server is an integral aspect of the injected instrumentation and
simply stops at the first instrumented function to await commands from
afl-fuzz.
With fast targets, the fork server can offer considerable performance gains,
usually between 1.5x and 2x. It is also possible to:
- Use the fork server in manual ("deferred") mode, skipping over larger,
user-selected chunks of initialization code. It requires very modest
code changes to the targeted program, and With some targets, can
produce 10x+ performance gains.
- Enable "persistent" mode, where a single process is used to try out
multiple inputs, greatly limiting the overhead of repetitive fork()
calls. This generally requires some code changes to the targeted program,
but can improve the performance of fast targets by a factor of 5 or more
- approximating the benefits of in-process fuzzing jobs while still
maintaining very robust isolation between the fuzzer process and the
targeted binary.
11) Parallelization
-------------------
The parallelization mechanism relies on periodically examining the queues
produced by independently-running instances on other CPU cores or on remote
machines, and then selectively pulling in the test cases that, when tried
out locally, produce behaviors not yet seen by the fuzzer at hand.
This allows for extreme flexibility in fuzzer setup, including running synced
instances against different parsers of a common data format, often with
synergistic effects.
For more information about this design, see parallel_fuzzing.txt.
12) Binary-only instrumentation
-------------------------------
Instrumentation of black-box, binary-only targets is accomplished with the
help of a separately-built version of QEMU in "user emulation" mode. This also
allows the execution of cross-architecture code - say, ARM binaries on x86.
QEMU uses basic blocks as translation units; the instrumentation is implemented
on top of this and uses a model roughly analogous to the compile-time hooks:
if (block_address > elf_text_start && block_address < elf_text_end) {
cur_location = (block_address >> 4) ^ (block_address << 8);
shared_mem[cur_location ^ prev_location]++;
prev_location = cur_location >> 1;
}
The shift-and-XOR-based scrambling in the second line is used to mask the
effects of instruction alignment.
The start-up of binary translators such as QEMU, DynamoRIO, and PIN is fairly
slow; to counter this, the QEMU mode leverages a fork server similar to that
used for compiler-instrumented code, effectively spawning copies of an
already-initialized process paused at _start.
First-time translation of a new basic block also incurs substantial latency. To
eliminate this problem, the AFL fork server is extended by providing a channel
between the running emulator and the parent process. The channel is used
to notify the parent about the addresses of any newly-encountered blocks and to
add them to the translation cache that will be replicated for future child
processes.
As a result of these two optimizations, the overhead of the QEMU mode is
roughly 2-5x, compared to 100x+ for PIN.
13) The afl-analyze tool
------------------------
The file format analyzer is a simple extension of the minimization algorithm
discussed earlier on; instead of attempting to remove no-op blocks, the tool
performs a series of walking byte flips and then annotates runs of bytes
in the input file.
It uses the following classification scheme:
- "No-op blocks" - segments where bit flips cause no apparent changes to
control flow. Common examples may be comment sections, pixel data within
a bitmap file, etc.
- "Superficial content" - segments where some, but not all, bitflips
produce some control flow changes. Examples may include strings in rich
documents (e.g., XML, RTF).
- "Critical stream" - a sequence of bytes where all bit flips alter control
flow in different but correlated ways. This may be compressed data,
non-atomically compared keywords or magic values, etc.
- "Suspected length field" - small, atomic integer that, when touched in
any way, causes a consistent change to program control flow, suggestive
of a failed length check.
- "Suspected cksum or magic int" - an integer that behaves similarly to a
length field, but has a numerical value that makes the length explanation
unlikely. This is suggestive of a checksum or other "magic" integer.
- "Suspected checksummed block" - a long block of data where any change
always triggers the same new execution path. Likely caused by failing
a checksum or a similar integrity check before any subsequent parsing
takes place.
- "Magic value section" - a generic token where changes cause the type
of binary behavior outlined earlier, but that doesn't meet any of the
other criteria. May be an atomically compared keyword or so.

Binary file not shown.

After

Width:  |  Height:  |  Size: 581 KiB

@ -0,0 +1 @@
() { _; } >_[$($())] { id; }

@ -0,0 +1 @@
() { x() { _; }; x() { _; } <<a; }

Binary file not shown.

After

Width:  |  Height:  |  Size: 892 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 179 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 642 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 595 B

@ -0,0 +1,3 @@
<!DOCTYPEd[<!ENTITY
S ""><!ENTITY %
N "<!ELEMENT<![INCLUDE0"<!ENTITYL%N;

Binary file not shown.

After

Width:  |  Height:  |  Size: 876 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 293 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 434 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 996 B

@ -0,0 +1,2 @@
create table t0(o CHar(0)CHECK(0&O>O));insert into t0
select randomblob(0)-trim(0);

@ -0,0 +1 @@
SELECT 0 UNION SELECT 0 ORDER BY 1 COLLATE"""""""";

@ -0,0 +1 @@
PRAGMA foreign_keys=1;CREATE TABLE t1("""0"PRIMARY KEy REFERENCES t1 ON DELETE SET NULL);REPLACE INTO t1 SELECT(0);

@ -0,0 +1,2 @@
DROP TABLE IF EXISTS t;CREATE VIRTUAL TABLE t0 USING fts4();insert into t0 select zeroblob(0);SAVEPOINT O;insert into t0
select(0);SAVEPOINT E;insert into t0 SELECT 0 UNION SELECT 0'x'ORDER BY x;

File diff suppressed because one or more lines are too long

@ -0,0 +1 @@
SELECT*from(select"",zeroblob(0),zeroblob(1E9),zeroblob(0),zeroblob(150000000),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(1E9),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0)),(select"",zeroblob(1E9),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(1E9),(0),zeroblob(150000000),(0),zeroblob(0),(0)EXCEPT select zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0));

@ -0,0 +1,2 @@
create table t0(t);insert into t0
select strftime();

@ -0,0 +1 @@
SELECT fts3_tokenizer(@0());

@ -0,0 +1 @@
select''like''like''like#0;

@ -0,0 +1 @@
PRAGMA e;select lower(0);select lower(0)"a",""GROUP BY a ORDER BY a;

@ -0,0 +1 @@
WITH x AS(SELECT*FROM t)SELECT""EXCEPT SELECT 0 ORDER BY 0 COLLATE"";

@ -0,0 +1 @@
CREATE VIRTUAL TABLE x USING fts4();VALUES(0,0),(0,0),(0,0),(0,0);PRAGMA writable_schema=ON;UPDATE sqlite_master SET sql=''WHERE name='';UPDATE sqlite_master SET sql='CREATE table t(d CHECK(T(#0)';SAVEPOINT K;SAVEPOINT T;SAVEPOINT T;ANALYZE;ROLLBACK;SAVEPOINT E;DROP TABLE IF EXISTS t;

@ -0,0 +1 @@
CREATE VIRTUAL TABLE t4 USING fts4(0,b,c,notindexed=0);INSERT INTO t4 VALUES('','','0');BEGIN;INSERT INTO t4 VALUES('','','0');INSERT INTO t4(t4)VALUES('integrity-check');

@ -0,0 +1 @@
DETACH(select group_concat(q));

@ -0,0 +1 @@
select(select strftime());

@ -0,0 +1 @@
select e.*,0 from(s,(L))e;

@ -0,0 +1 @@
PRAGMA encoding='UTF16';CREATE VIRTUAL TABLE È USING s;

@ -0,0 +1 @@
CREATE VIRTUAL TABLE t USING fts4(tokenize=);

@ -0,0 +1 @@
CREATE TABLE p(a UNIQUE,PRIMARY KEY('a'))WITHOUT rowid;

@ -0,0 +1 @@
CREATE TABLE t0(z);WITH d(x)AS(SELECT*UNION SELECT 0)INSERT INTO t0 SELECT 0 FROM d;

@ -0,0 +1 @@
create table t0( DEFAULT(0=0)NOT/**/NULL);REPLACE into t0 select'';

@ -0,0 +1,6 @@
CREATE VIRTUAL TABLE t0 USING fts4(x,order=DESC);
INSERT INTO t0(docid,x)VALUES(-1E0,'0(o');
INSERT INTO t0 VALUES('');
INSERT INTO t0 VALUES('');
INSeRT INTO t0 VALUES('o');
SELECT docid FROM t0 WHERE t0 MATCH'"0*o"';

@ -0,0 +1 @@
SELECT printf('%*.*f',90000||006000000&6600000000,00000000000000000909000000000000.0000000000000000)""WHERE"">"";

@ -0,0 +1 @@
CREATE VIRTUAL TABLE t0 USING fts4(content=t0);

@ -0,0 +1 @@
REATE VIRTUAL TABLE t0 USING fts4(prefix=0);INSERT INTO t0 VALUES(0);

@ -0,0 +1 @@
create table t(s);PRAGMA writable_schema=ON;UPDATE sqlite_master SET sql='ANALYZE;CREATE VIRTUAL TABLE t USING fts3;DROP TABLE t;DROP TABLE EXISTS t';PRAGMA r;SAVEPOINT T;ANALYZE;ROLLBACK;SAVEPOINT E;DROP TABLE IF EXISTS t;

@ -0,0 +1,3 @@
$$@$$$@$o
S…Ôo
S…Ô

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save