上传了源代码项目

9 months ago · e231dae4cc
parent d57b06c39d
commit e231dae4cc
177 changed files with 12752 additions and 4480 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,20 @@
 # Binaries produced by "make".
 afl-analyze
 afl-as
 afl-clang
 afl-clang++
 afl-fuzz
 afl-g++
 afl-gcc
 afl-gotcpu
 afl-showmap
 afl-tmin
 as
 # Binaries produced by "make -C llvm_mode"
 afl-clang-fast
 afl-clang-fast++
 afl-llvm-pass.so
 afl-llvm-rt-32.o
 afl-llvm-rt-64.o
 afl-llvm-rt.o
--- a/.travis.yml
+++ b/.travis.yml
@ -0,0 +1,60 @@
 language: c
 env:
  - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_STOP_MANUALLY=1
  - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_EXIT_WHEN_DONE=1
 # TODO: test AFL_BENCH_UNTIL_CRASH once we have a target that crashes
  - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_BENCH_JUST_ONE=1
 before_install:
  - sudo apt update
  - sudo apt install -y libtool libtool-bin automake bison libglib2.0
 # TODO: Look into splitting off some builds using a build matrix.
 # TODO: Move this all into a bash script so we don't need to write bash in yaml.
 script:
  - make
  - ./afl-gcc ./test-instr.c -o test-instr-gcc
  - mkdir seeds
  - echo "" > seeds/nil_seed
  - if [ -z "$AFL_STOP_MANUALLY" ];
    then ./afl-fuzz -i seeds -o out/ -- ./test-instr-gcc;
    else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-gcc;
    fi
  - .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 3
  - rm -r out/*
  - ./afl-clang ./test-instr.c -o test-instr-clang
  - if [ -z "$AFL_STOP_MANUALLY" ];
    then ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang;
    else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang;
    fi
  - .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 2
  - make clean
  - CC=clang CXX=clang++ make
  - cd llvm_mode
  # TODO: Build with different versions of clang/LLVM since LLVM passes don't
  # have a stable API.
  - CC=clang CXX=clang++ LLVM_CONFIG=llvm-config make
  - cd ..
  - rm -r out/*
  - ./afl-clang-fast ./test-instr.c -o test-instr-clang-fast
  - if [ -z "$AFL_STOP_MANUALLY" ];
    then ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang-fast;
    else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang-fast;
    fi
  - .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 3
  # Test fuzzing libFuzzer targets and trace-pc-guard instrumentation.
  - clang -g -fsanitize-coverage=trace-pc-guard ./test-libfuzzer-target.c -c
  - clang -c -w llvm_mode/afl-llvm-rt.o.c
  - wget https://raw.githubusercontent.com/llvm/llvm-project/main/compiler-rt/lib/fuzzer/afl/afl_driver.cpp
  - clang++ afl_driver.cpp afl-llvm-rt.o.o test-libfuzzer-target.o -o test-libfuzzer-target
  - timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-libfuzzer-target
  - cd qemu_mode
  - ./build_qemu_support.sh
  - cd ..
  - gcc ./test-instr.c -o test-no-instr
  - if [ -z "$AFL_STOP_MANUALLY" ];
    then ./afl-fuzz -Q -i seeds -o out/ -- ./test-no-instr;
    else timeout --preserve-status 5s ./afl-fuzz -Q -i seeds -o out/ -- ./test-no-instr;
    fi
  - .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 12 -p 9
--- a/.travis/check_fuzzer_stats.sh
+++ b/.travis/check_fuzzer_stats.sh
--- a/README.md
+++ b/README.md
@ -1,4 +1,493 @@
-# 开源项目阅读：AFL
+# american fuzzy lop
- 源代码地址：https://github.com/google/AFL
+[![Build Status](https://travis-ci.org/google/AFL.svg?branch=master)](https://travis-ci.org/google/AFL)
 Originally developed by Michal Zalewski <lcamtuf@google.com>.
 See [QuickStartGuide.txt](docs/QuickStartGuide.txt) if you don't have time to read
 this file.
 ## 1) Challenges of guided fuzzing
 Fuzzing is one of the most powerful and proven strategies for identifying
 security issues in real-world software; it is responsible for the vast
 majority of remote code execution and privilege escalation bugs found to date
 in security-critical software.
 Unfortunately, fuzzing is also relatively shallow; blind, random mutations
 make it very unlikely to reach certain code paths in the tested code, leaving
 some vulnerabilities firmly outside the reach of this technique.
 There have been numerous attempts to solve this problem. One of the early
 approaches - pioneered by Tavis Ormandy - is corpus distillation. The method
 relies on coverage signals to select a subset of interesting seeds from a
 massive, high-quality corpus of candidate files, and then fuzz them by
 traditional means. The approach works exceptionally well, but requires such
 a corpus to be readily available. In addition, block coverage measurements
 provide only a very simplistic understanding of program state, and are less
 useful for guiding the fuzzing effort in the long haul.
 Other, more sophisticated research has focused on techniques such as program
 flow analysis ("concolic execution"), symbolic execution, or static analysis.
 All these methods are extremely promising in experimental settings, but tend
 to suffer from reliability and performance problems in practical uses - and
 currently do not offer a viable alternative to "dumb" fuzzing techniques.
 ## 2) The afl-fuzz approach
 American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple
 but rock-solid instrumentation-guided genetic algorithm. It uses a modified
 form of edge coverage to effortlessly pick up subtle, local-scale changes to
 program control flow.
 Simplifying a bit, the overall algorithm can be summed up as:
  1) Load user-supplied initial test cases into the queue,
  2) Take next input file from the queue,
  3) Attempt to trim the test case to the smallest size that doesn't alter
     the measured behavior of the program,
  4) Repeatedly mutate the file using a balanced and well-researched variety
     of traditional fuzzing strategies,
  5) If any of the generated mutations resulted in a new state transition
     recorded by the instrumentation, add mutated output as a new entry in the
     queue.
  6) Go to 2.
 The discovered test cases are also periodically culled to eliminate ones that
 have been obsoleted by newer, higher-coverage finds; and undergo several other
 instrumentation-driven effort minimization steps.
 As a side result of the fuzzing process, the tool creates a small,
 self-contained corpus of interesting test cases. These are extremely useful
 for seeding other, labor- or resource-intensive testing regimes - for example,
 for stress-testing browsers, office applications, graphics suites, or
 closed-source tools.
 The fuzzer is thoroughly tested to deliver out-of-the-box performance far
 superior to blind fuzzing or coverage-only tools.
 ## 3) Instrumenting programs for use with AFL
 When source code is available, instrumentation can be injected by a companion
 tool that works as a drop-in replacement for gcc or clang in any standard build
 process for third-party code.
 The instrumentation has a fairly modest performance impact; in conjunction with
 other optimizations implemented by afl-fuzz, most programs can be fuzzed as fast
 or even faster than possible with traditional tools.
 The correct way to recompile the target program may vary depending on the
 specifics of the build process, but a nearly-universal approach would be:
 ```shell
 $ CC=/path/to/afl/afl-gcc ./configure
 $ make clean all
 ```
 For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`.
 The clang wrappers (afl-clang and afl-clang++) can be used in the same way;
 clang users may also opt to leverage a higher-performance instrumentation mode,
 as described in llvm_mode/README.llvm.
 When testing libraries, you need to find or write a simple program that reads
 data from stdin or from a file and passes it to the tested library. In such a
 case, it is essential to link this executable against a static version of the
 instrumented library, or to make sure that the correct .so file is loaded at
 runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static
 build, usually possible via:
 ```shell
 $ CC=/path/to/afl/afl-gcc ./configure --disable-shared
 ```
 Setting `AFL_HARDEN=1` when calling 'make' will cause the CC wrapper to
 automatically enable code hardening options that make it easier to detect
 simple memory bugs. Libdislocator, a helper library included with AFL (see
 libdislocator/README.dislocator) can help uncover heap corruption issues, too.
 PS. ASAN users are advised to review [notes_for_asan.txt](docs/notes_for_asan.txt) file for important
 caveats.
 ## 4) Instrumenting binary-only apps
 When source code is *NOT* available, the fuzzer offers experimental support for
 fast, on-the-fly instrumentation of black-box binaries. This is accomplished
 with a version of QEMU running in the lesser-known "user space emulation" mode.
 QEMU is a project separate from AFL, but you can conveniently build the
 feature by doing:
 ```shell
 $ cd qemu_mode
 $ ./build_qemu_support.sh
 ```
 For additional instructions and caveats, see qemu_mode/README.qemu.
 The mode is approximately 2-5x slower than compile-time instrumentation, is
 less conducive to parallelization, and may have some other quirks.
 ## 5) Choosing initial test cases
 To operate correctly, the fuzzer requires one or more starting file that
 contains a good example of the input data normally expected by the targeted
 application. There are two basic rules:
  - Keep the files small. Under 1 kB is ideal, although not strictly necessary.
    For a discussion of why size matters, see [perf_tips.txt](docs/perf_tips.txt).
  - Use multiple test cases only if they are functionally different from
    each other. There is no point in using fifty different vacation photos
    to fuzz an image library.
 You can find many good examples of starting files in the testcases/ subdirectory
 that comes with this tool.
 PS. If a large corpus of data is available for screening, you may want to use
 the afl-cmin utility to identify a subset of functionally distinct files that
 exercise different code paths in the target binary.
 ## 6) Fuzzing binaries
 The fuzzing process itself is carried out by the afl-fuzz utility. This program
 requires a read-only directory with initial test cases, a separate place to
 store its findings, plus a path to the binary to test.
 For target binaries that accept input directly from stdin, the usual syntax is:
 ```shell
 $ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program [...params...]
 ```
 For programs that take input from a file, use '@@' to mark the location in
 the target's command line where the input file name should be placed. The
 fuzzer will substitute this for you:
 ```shell
 $ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@
 ```
 You can also use the -f option to have the mutated data written to a specific
 file. This is useful if the program expects a particular file extension or so.
 Non-instrumented binaries can be fuzzed in the QEMU mode (add -Q in the command
 line) or in a traditional, blind-fuzzer mode (specify -n).
 You can use -t and -m to override the default timeout and memory limit for the
 executed process; rare examples of targets that may need these settings touched
 include compilers and video decoders.
 Tips for optimizing fuzzing performance are discussed in [perf_tips.txt](docs/perf_tips.txt).
 Note that afl-fuzz starts by performing an array of deterministic fuzzing
 steps, which can take several days, but tend to produce neat test cases. If you
 want quick & dirty results right away - akin to zzuf and other traditional
 fuzzers - add the -d option to the command line.
 ## 7) Interpreting output
 See the [status_screen.txt](docs/status_screen.txt) file for information on
 how to interpret the displayed stats and monitor the health of the process.
 Be sure to consult this file especially if any UI elements are highlighted in
 red.
 The fuzzing process will continue until you press Ctrl-C. At minimum, you want
 to allow the fuzzer to complete one queue cycle, which may take anywhere from a
 couple of hours to a week or so.
 There are three subdirectories created within the output directory and updated
 in real time:
  - queue/   - test cases for every distinctive execution path, plus all the
               starting files given by the user. This is the synthesized corpus
               mentioned in section 2.
               Before using this corpus for any other purposes, you can shrink
               it to a smaller size using the afl-cmin tool. The tool will find
               a smaller subset of files offering equivalent edge coverage.
  - crashes/ - unique test cases that cause the tested program to receive a
               fatal signal (e.g., SIGSEGV, SIGILL, SIGABRT). The entries are
               grouped by the received signal.
  - hangs/   - unique test cases that cause the tested program to time out. The
               default time limit before something is classified as a hang is
               the larger of 1 second and the value of the -t parameter.
               The value can be fine-tuned by setting AFL_HANG_TMOUT, but this
               is rarely necessary.
 Crashes and hangs are considered "unique" if the associated execution paths
 involve any state transitions not seen in previously-recorded faults. If a
 single bug can be reached in multiple ways, there will be some count inflation
 early in the process, but this should quickly taper off.
 The file names for crashes and hangs are correlated with parent, non-faulting
 queue entries. This should help with debugging.
 When you can't reproduce a crash found by afl-fuzz, the most likely cause is
 that you are not setting the same memory limit as used by the tool. Try:
 ```shell
 $ LIMIT_MB=50
 $ ( ulimit -Sv $[LIMIT_MB << 10]; /path/to/tested_binary ... )
 ```
 Change LIMIT_MB to match the -m parameter passed to afl-fuzz. On OpenBSD,
 also change -Sv to -Sd.
 Any existing output directory can be also used to resume aborted jobs; try:
 ```shell
 $ ./afl-fuzz -i- -o existing_output_dir [...etc...]
 ```
 If you have gnuplot installed, you can also generate some pretty graphs for any
 active fuzzing task using afl-plot. For an example of how this looks like,
 see [http://lcamtuf.coredump.cx/afl/plot/](http://lcamtuf.coredump.cx/afl/plot/).
 ## 8) Parallelized fuzzing
 Every instance of afl-fuzz takes up roughly one core. This means that on
 multi-core systems, parallelization is necessary to fully utilize the hardware.
 For tips on how to fuzz a common target on multiple cores or multiple networked
 machines, please refer to [parallel_fuzzing.txt](docs/parallel_fuzzing.txt).
 The parallel fuzzing mode also offers a simple way for interfacing AFL to other
 fuzzers, to symbolic or concolic execution engines, and so forth; again, see the
 last section of [parallel_fuzzing.txt](docs/parallel_fuzzing.txt) for tips.
 ## 9) Fuzzer dictionaries
 By default, afl-fuzz mutation engine is optimized for compact data formats -
 say, images, multimedia, compressed data, regular expression syntax, or shell
 scripts. It is somewhat less suited for languages with particularly verbose and
 redundant verbiage - notably including HTML, SQL, or JavaScript.
 To avoid the hassle of building syntax-aware tools, afl-fuzz provides a way to
 seed the fuzzing process with an optional dictionary of language keywords,
 magic headers, or other special tokens associated with the targeted data type
 -- and use that to reconstruct the underlying grammar on the go:
  [http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html](http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html)
 To use this feature, you first need to create a dictionary in one of the two
 formats discussed in dictionaries/README.dictionaries; and then point the fuzzer
 to it via the -x option in the command line.
 (Several common dictionaries are already provided in that subdirectory, too.)
 There is no way to provide more structured descriptions of the underlying
 syntax, but the fuzzer will likely figure out some of this based on the
 instrumentation feedback alone. This actually works in practice, say:
  [http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html](http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html)
 PS. Even when no explicit dictionary is given, afl-fuzz will try to extract
 existing syntax tokens in the input corpus by watching the instrumentation
 very closely during deterministic byte flips. This works for some types of
 parsers and grammars, but isn't nearly as good as the -x mode.
 If a dictionary is really hard to come by, another option is to let AFL run
 for a while, and then use the token capture library that comes as a companion
 utility with AFL. For that, see libtokencap/README.tokencap.
 ## 10) Crash triage
 The coverage-based grouping of crashes usually produces a small data set that
 can be quickly triaged manually or with a very simple GDB or Valgrind script.
 Every crash is also traceable to its parent non-crashing test case in the
 queue, making it easier to diagnose faults.
 Having said that, it's important to acknowledge that some fuzzing crashes can be
 difficult to quickly evaluate for exploitability without a lot of debugging and
 code analysis work. To assist with this task, afl-fuzz supports a very unique
 "crash exploration" mode enabled with the -C flag.
 In this mode, the fuzzer takes one or more crashing test cases as the input,
 and uses its feedback-driven fuzzing strategies to very quickly enumerate all
 code paths that can be reached in the program while keeping it in the
 crashing state.
 Mutations that do not result in a crash are rejected; so are any changes that
 do not affect the execution path.
 The output is a small corpus of files that can be very rapidly examined to see
 what degree of control the attacker has over the faulting address, or whether
 it is possible to get past an initial out-of-bounds read - and see what lies
 beneath.
 Oh, one more thing: for test case minimization, give afl-tmin a try. The tool
 can be operated in a very simple way:
 ```shell
 $ ./afl-tmin -i test_case -o minimized_result -- /path/to/program [...]
 ```
 The tool works with crashing and non-crashing test cases alike. In the crash
 mode, it will happily accept instrumented and non-instrumented binaries. In the
 non-crashing mode, the minimizer relies on standard AFL instrumentation to make
 the file simpler without altering the execution path.
 The minimizer accepts the -m, -t, -f and @@ syntax in a manner compatible with
 afl-fuzz.
 Another recent addition to AFL is the afl-analyze tool. It takes an input
 file, attempts to sequentially flip bytes, and observes the behavior of the
 tested program. It then color-codes the input based on which sections appear to
 be critical, and which are not; while not bulletproof, it can often offer quick
 insights into complex file formats. More info about its operation can be found
 near the end of [technical_details.txt](docs/technical_details.txt).
 ## 11) Going beyond crashes
 Fuzzing is a wonderful and underutilized technique for discovering non-crashing
 design and implementation errors, too. Quite a few interesting bugs have been
 found by modifying the target programs to call abort() when, say:
  - Two bignum libraries produce different outputs when given the same
    fuzzer-generated input,
  - An image library produces different outputs when asked to decode the same
    input image several times in a row,
  - A serialization / deserialization library fails to produce stable outputs
    when iteratively serializing and deserializing fuzzer-supplied data,
  - A compression library produces an output inconsistent with the input file
    when asked to compress and then decompress a particular blob.
 Implementing these or similar sanity checks usually takes very little time;
 if you are the maintainer of a particular package, you can make this code
 conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also
 shared with libfuzzer) or `#ifdef __AFL_COMPILER` (this one is just for AFL).
 ## 12) Common-sense risks
 Please keep in mind that, similarly to many other computationally-intensive
 tasks, fuzzing may put strain on your hardware and on the OS. In particular:
  - Your CPU will run hot and will need adequate cooling. In most cases, if
    cooling is insufficient or stops working properly, CPU speeds will be
    automatically throttled. That said, especially when fuzzing on less
    suitable hardware (laptops, smartphones, etc), it's not entirely impossible
    for something to blow up.
  - Targeted programs may end up erratically grabbing gigabytes of memory or
    filling up disk space with junk files. AFL tries to enforce basic memory
    limits, but can't prevent each and every possible mishap. The bottom line
    is that you shouldn't be fuzzing on systems where the prospect of data loss
    is not an acceptable risk.
  - Fuzzing involves billions of reads and writes to the filesystem. On modern
    systems, this will be usually heavily cached, resulting in fairly modest
    "physical" I/O - but there are many factors that may alter this equation.
    It is your responsibility to monitor for potential trouble; with very heavy
    I/O, the lifespan of many HDDs and SSDs may be reduced.
    A good way to monitor disk I/O on Linux is the 'iostat' command:
 ```shell
    $ iostat -d 3 -x -k [...optional disk ID...]
 ```
 ## 13) Known limitations & areas for improvement
 Here are some of the most important caveats for AFL:
  - AFL detects faults by checking for the first spawned process dying due to
    a signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for
    these signals may need to have the relevant code commented out. In the same
    vein, faults in child processed spawned by the fuzzed target may evade
    detection unless you manually add some code to catch that.
  - As with any other brute-force tool, the fuzzer offers limited coverage if
    encryption, checksums, cryptographic signatures, or compression are used to
    wholly wrap the actual data format to be tested.
    To work around this, you can comment out the relevant checks (see
    experimental/libpng_no_checksum/ for inspiration); if this is not possible,
    you can also write a postprocessor, as explained in
    experimental/post_library/.
  - There are some unfortunate trade-offs with ASAN and 64-bit binaries. This
    isn't due to any specific fault of afl-fuzz; see [notes_for_asan.txt](docs/notes_for_asan.txt)
    for tips.
  - There is no direct support for fuzzing network services, background
    daemons, or interactive apps that require UI interaction to work. You may
    need to make simple code changes to make them behave in a more traditional
    way. Preeny may offer a relatively simple option, too - see:
    https://github.com/zardus/preeny
    Some useful tips for modifying network-based services can be also found at:
    https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
  - AFL doesn't output human-readable coverage data. If you want to monitor
    coverage, use afl-cov from Michael Rash: https://github.com/mrash/afl-cov
  - Occasionally, sentient machines rise against their creators. If this
    happens to you, please consult http://lcamtuf.coredump.cx/prep/.
 Beyond this, see INSTALL for platform-specific tips.
 ## 14) Special thanks
 Many of the improvements to afl-fuzz wouldn't be possible without feedback,
 bug reports, or patches from:
 ```
  Jann Horn                             Hanno Boeck
  Felix Groebert                        Jakub Wilk
  Richard W. M. Jones                   Alexander Cherepanov
  Tom Ritter                            Hovik Manucharyan
  Sebastian Roschke                     Eberhard Mattes
  Padraig Brady                         Ben Laurie
  @dronesec                             Luca Barbato
  Tobias Ospelt                         Thomas Jarosch
  Martin Carpenter                      Mudge Zatko
  Joe Zbiciak                           Ryan Govostes
  Michael Rash                          William Robinet
  Jonathan Gray                         Filipe Cabecinhas
  Nico Weber                            Jodie Cunningham
  Andrew Griffiths                      Parker Thompson
  Jonathan Neuschfer                    Tyler Nighswander
  Ben Nagy                              Samir Aguiar
  Aidan Thornton                        Aleksandar Nikolich
  Sam Hakim                             Laszlo Szekeres
  David A. Wheeler                      Turo Lamminen
  Andreas Stieger                       Richard Godbee
  Louis Dassy                           teor2345
  Alex Moneger                          Dmitry Vyukov
  Keegan McAllister                     Kostya Serebryany
  Richo Healey                          Martijn Bogaard
  rc0r                                  Jonathan Foote
  Christian Holler                      Dominique Pelle
  Jacek Wielemborek                     Leo Barnes
  Jeremy Barnes                         Jeff Trull
  Guillaume Endignoux                   ilovezfs
  Daniel Godas-Lopez                    Franjo Ivancic
  Austin Seipp                          Daniel Komaromy
  Daniel Binderman                      Jonathan Metzman
  Vegard Nossum                         Jan Kneschke
  Kurt Roeckx                           Marcel Bohme
  Van-Thuan Pham                        Abhik Roychoudhury
  Joshua J. Drake                       Toby Hutton
  Rene Freingruber                      Sergey Davidoff
  Sami Liedes                           Craig Young
  Andrzej Jackowski                     Daniel Hodson
 ```
 Thank you!
 ## 15) Contact
 Questions? Concerns? Bug reports? Please use GitHub.
 There is also a mailing list for the project; to join, send a mail to
 <afl-users+subscribe@googlegroups.com>. Or, if you prefer to browse
 archives first, try: [https://groups.google.com/group/afl-users](https://groups.google.com/group/afl-users).
--- a/README1.md
+++ b/README1.md
@ -1,493 +0,0 @@
 # american fuzzy lop
 [![Build Status](https://travis-ci.org/google/AFL.svg?branch=master)](https://travis-ci.org/google/AFL)
 Originally developed by Michal Zalewski <lcamtuf@google.com>.
 See [QuickStartGuide.txt](docs/QuickStartGuide.txt) if you don't have time to read
 this file.
 ## 1) Challenges of guided fuzzing
 Fuzzing is one of the most powerful and proven strategies for identifying
 security issues in real-world software; it is responsible for the vast
 majority of remote code execution and privilege escalation bugs found to date
 in security-critical software.
 Unfortunately, fuzzing is also relatively shallow; blind, random mutations
 make it very unlikely to reach certain code paths in the tested code, leaving
 some vulnerabilities firmly outside the reach of this technique.
 There have been numerous attempts to solve this problem. One of the early
 approaches - pioneered by Tavis Ormandy - is corpus distillation. The method
 relies on coverage signals to select a subset of interesting seeds from a
 massive, high-quality corpus of candidate files, and then fuzz them by
 traditional means. The approach works exceptionally well, but requires such
 a corpus to be readily available. In addition, block coverage measurements
 provide only a very simplistic understanding of program state, and are less
 useful for guiding the fuzzing effort in the long haul.
 Other, more sophisticated research has focused on techniques such as program
 flow analysis ("concolic execution"), symbolic execution, or static analysis.
 All these methods are extremely promising in experimental settings, but tend
 to suffer from reliability and performance problems in practical uses - and
 currently do not offer a viable alternative to "dumb" fuzzing techniques.
 ## 2) The afl-fuzz approach
 American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple
 but rock-solid instrumentation-guided genetic algorithm. It uses a modified
 form of edge coverage to effortlessly pick up subtle, local-scale changes to
 program control flow.
 Simplifying a bit, the overall algorithm can be summed up as:
  1) Load user-supplied initial test cases into the queue,
  2) Take next input file from the queue,
  3) Attempt to trim the test case to the smallest size that doesn't alter
     the measured behavior of the program,
  4) Repeatedly mutate the file using a balanced and well-researched variety
     of traditional fuzzing strategies,
  5) If any of the generated mutations resulted in a new state transition
     recorded by the instrumentation, add mutated output as a new entry in the
     queue.
  6) Go to 2.
 The discovered test cases are also periodically culled to eliminate ones that
 have been obsoleted by newer, higher-coverage finds; and undergo several other
 instrumentation-driven effort minimization steps.
 As a side result of the fuzzing process, the tool creates a small,
 self-contained corpus of interesting test cases. These are extremely useful
 for seeding other, labor- or resource-intensive testing regimes - for example,
 for stress-testing browsers, office applications, graphics suites, or
 closed-source tools.
 The fuzzer is thoroughly tested to deliver out-of-the-box performance far
 superior to blind fuzzing or coverage-only tools.
 ## 3) Instrumenting programs for use with AFL
 When source code is available, instrumentation can be injected by a companion
 tool that works as a drop-in replacement for gcc or clang in any standard build
 process for third-party code.
 The instrumentation has a fairly modest performance impact; in conjunction with
 other optimizations implemented by afl-fuzz, most programs can be fuzzed as fast
 or even faster than possible with traditional tools.
 The correct way to recompile the target program may vary depending on the
 specifics of the build process, but a nearly-universal approach would be:
 ```shell
 $ CC=/path/to/afl/afl-gcc ./configure
 $ make clean all
 ```
 For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`.
 The clang wrappers (afl-clang and afl-clang++) can be used in the same way;
 clang users may also opt to leverage a higher-performance instrumentation mode,
 as described in llvm_mode/README.llvm.
 When testing libraries, you need to find or write a simple program that reads
 data from stdin or from a file and passes it to the tested library. In such a
 case, it is essential to link this executable against a static version of the
 instrumented library, or to make sure that the correct .so file is loaded at
 runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static
 build, usually possible via:
 ```shell
 $ CC=/path/to/afl/afl-gcc ./configure --disable-shared
 ```
 Setting `AFL_HARDEN=1` when calling 'make' will cause the CC wrapper to
 automatically enable code hardening options that make it easier to detect
 simple memory bugs. Libdislocator, a helper library included with AFL (see
 libdislocator/README.dislocator) can help uncover heap corruption issues, too.
 PS. ASAN users are advised to review [notes_for_asan.txt](docs/notes_for_asan.txt) file for important
 caveats.
 ## 4) Instrumenting binary-only apps
 When source code is *NOT* available, the fuzzer offers experimental support for
 fast, on-the-fly instrumentation of black-box binaries. This is accomplished
 with a version of QEMU running in the lesser-known "user space emulation" mode.
 QEMU is a project separate from AFL, but you can conveniently build the
 feature by doing:
 ```shell
 $ cd qemu_mode
 $ ./build_qemu_support.sh
 ```
 For additional instructions and caveats, see qemu_mode/README.qemu.
 The mode is approximately 2-5x slower than compile-time instrumentation, is
 less conducive to parallelization, and may have some other quirks.
 ## 5) Choosing initial test cases
 To operate correctly, the fuzzer requires one or more starting file that
 contains a good example of the input data normally expected by the targeted
 application. There are two basic rules:
  - Keep the files small. Under 1 kB is ideal, although not strictly necessary.
    For a discussion of why size matters, see [perf_tips.txt](docs/perf_tips.txt).
  - Use multiple test cases only if they are functionally different from
    each other. There is no point in using fifty different vacation photos
    to fuzz an image library.
 You can find many good examples of starting files in the testcases/ subdirectory
 that comes with this tool.
 PS. If a large corpus of data is available for screening, you may want to use
 the afl-cmin utility to identify a subset of functionally distinct files that
 exercise different code paths in the target binary.
 ## 6) Fuzzing binaries
 The fuzzing process itself is carried out by the afl-fuzz utility. This program
 requires a read-only directory with initial test cases, a separate place to
 store its findings, plus a path to the binary to test.
 For target binaries that accept input directly from stdin, the usual syntax is:
 ```shell
 $ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program [...params...]
 ```
 For programs that take input from a file, use '@@' to mark the location in
 the target's command line where the input file name should be placed. The
 fuzzer will substitute this for you:
 ```shell
 $ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@
 ```
 You can also use the -f option to have the mutated data written to a specific
 file. This is useful if the program expects a particular file extension or so.
 Non-instrumented binaries can be fuzzed in the QEMU mode (add -Q in the command
 line) or in a traditional, blind-fuzzer mode (specify -n).
 You can use -t and -m to override the default timeout and memory limit for the
 executed process; rare examples of targets that may need these settings touched
 include compilers and video decoders.
 Tips for optimizing fuzzing performance are discussed in [perf_tips.txt](docs/perf_tips.txt).
 Note that afl-fuzz starts by performing an array of deterministic fuzzing
 steps, which can take several days, but tend to produce neat test cases. If you
 want quick & dirty results right away - akin to zzuf and other traditional
 fuzzers - add the -d option to the command line.
 ## 7) Interpreting output
 See the [status_screen.txt](docs/status_screen.txt) file for information on
 how to interpret the displayed stats and monitor the health of the process.
 Be sure to consult this file especially if any UI elements are highlighted in
 red.
 The fuzzing process will continue until you press Ctrl-C. At minimum, you want
 to allow the fuzzer to complete one queue cycle, which may take anywhere from a
 couple of hours to a week or so.
 There are three subdirectories created within the output directory and updated
 in real time:
  - queue/   - test cases for every distinctive execution path, plus all the
               starting files given by the user. This is the synthesized corpus
               mentioned in section 2.
               Before using this corpus for any other purposes, you can shrink
               it to a smaller size using the afl-cmin tool. The tool will find
               a smaller subset of files offering equivalent edge coverage.
  - crashes/ - unique test cases that cause the tested program to receive a
               fatal signal (e.g., SIGSEGV, SIGILL, SIGABRT). The entries are
               grouped by the received signal.
  - hangs/   - unique test cases that cause the tested program to time out. The
               default time limit before something is classified as a hang is
               the larger of 1 second and the value of the -t parameter.
               The value can be fine-tuned by setting AFL_HANG_TMOUT, but this
               is rarely necessary.
 Crashes and hangs are considered "unique" if the associated execution paths
 involve any state transitions not seen in previously-recorded faults. If a
 single bug can be reached in multiple ways, there will be some count inflation
 early in the process, but this should quickly taper off.
 The file names for crashes and hangs are correlated with parent, non-faulting
 queue entries. This should help with debugging.
 When you can't reproduce a crash found by afl-fuzz, the most likely cause is
 that you are not setting the same memory limit as used by the tool. Try:
 ```shell
 $ LIMIT_MB=50
 $ ( ulimit -Sv $[LIMIT_MB << 10]; /path/to/tested_binary ... )
 ```
 Change LIMIT_MB to match the -m parameter passed to afl-fuzz. On OpenBSD,
 also change -Sv to -Sd.
 Any existing output directory can be also used to resume aborted jobs; try:
 ```shell
 $ ./afl-fuzz -i- -o existing_output_dir [...etc...]
 ```
 If you have gnuplot installed, you can also generate some pretty graphs for any
 active fuzzing task using afl-plot. For an example of how this looks like,
 see [http://lcamtuf.coredump.cx/afl/plot/](http://lcamtuf.coredump.cx/afl/plot/).
 ## 8) Parallelized fuzzing
 Every instance of afl-fuzz takes up roughly one core. This means that on
 multi-core systems, parallelization is necessary to fully utilize the hardware.
 For tips on how to fuzz a common target on multiple cores or multiple networked
 machines, please refer to [parallel_fuzzing.txt](docs/parallel_fuzzing.txt).
 The parallel fuzzing mode also offers a simple way for interfacing AFL to other
 fuzzers, to symbolic or concolic execution engines, and so forth; again, see the
 last section of [parallel_fuzzing.txt](docs/parallel_fuzzing.txt) for tips.
 ## 9) Fuzzer dictionaries
 By default, afl-fuzz mutation engine is optimized for compact data formats -
 say, images, multimedia, compressed data, regular expression syntax, or shell
 scripts. It is somewhat less suited for languages with particularly verbose and
 redundant verbiage - notably including HTML, SQL, or JavaScript.
 To avoid the hassle of building syntax-aware tools, afl-fuzz provides a way to
 seed the fuzzing process with an optional dictionary of language keywords,
 magic headers, or other special tokens associated with the targeted data type
 -- and use that to reconstruct the underlying grammar on the go:
  [http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html](http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html)
 To use this feature, you first need to create a dictionary in one of the two
 formats discussed in dictionaries/README.dictionaries; and then point the fuzzer
 to it via the -x option in the command line.
 (Several common dictionaries are already provided in that subdirectory, too.)
 There is no way to provide more structured descriptions of the underlying
 syntax, but the fuzzer will likely figure out some of this based on the
 instrumentation feedback alone. This actually works in practice, say:
  [http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html](http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html)
 PS. Even when no explicit dictionary is given, afl-fuzz will try to extract
 existing syntax tokens in the input corpus by watching the instrumentation
 very closely during deterministic byte flips. This works for some types of
 parsers and grammars, but isn't nearly as good as the -x mode.
 If a dictionary is really hard to come by, another option is to let AFL run
 for a while, and then use the token capture library that comes as a companion
 utility with AFL. For that, see libtokencap/README.tokencap.
 ## 10) Crash triage
 The coverage-based grouping of crashes usually produces a small data set that
 can be quickly triaged manually or with a very simple GDB or Valgrind script.
 Every crash is also traceable to its parent non-crashing test case in the
 queue, making it easier to diagnose faults.
 Having said that, it's important to acknowledge that some fuzzing crashes can be
 difficult to quickly evaluate for exploitability without a lot of debugging and
 code analysis work. To assist with this task, afl-fuzz supports a very unique
 "crash exploration" mode enabled with the -C flag.
 In this mode, the fuzzer takes one or more crashing test cases as the input,
 and uses its feedback-driven fuzzing strategies to very quickly enumerate all
 code paths that can be reached in the program while keeping it in the
 crashing state.
 Mutations that do not result in a crash are rejected; so are any changes that
 do not affect the execution path.
 The output is a small corpus of files that can be very rapidly examined to see
 what degree of control the attacker has over the faulting address, or whether
 it is possible to get past an initial out-of-bounds read - and see what lies
 beneath.
 Oh, one more thing: for test case minimization, give afl-tmin a try. The tool
 can be operated in a very simple way:
 ```shell
 $ ./afl-tmin -i test_case -o minimized_result -- /path/to/program [...]
 ```
 The tool works with crashing and non-crashing test cases alike. In the crash
 mode, it will happily accept instrumented and non-instrumented binaries. In the
 non-crashing mode, the minimizer relies on standard AFL instrumentation to make
 the file simpler without altering the execution path.
 The minimizer accepts the -m, -t, -f and @@ syntax in a manner compatible with
 afl-fuzz.
 Another recent addition to AFL is the afl-analyze tool. It takes an input
 file, attempts to sequentially flip bytes, and observes the behavior of the
 tested program. It then color-codes the input based on which sections appear to
 be critical, and which are not; while not bulletproof, it can often offer quick
 insights into complex file formats. More info about its operation can be found
 near the end of [technical_details.txt](docs/technical_details.txt).
 ## 11) Going beyond crashes
 Fuzzing is a wonderful and underutilized technique for discovering non-crashing
 design and implementation errors, too. Quite a few interesting bugs have been
 found by modifying the target programs to call abort() when, say:
  - Two bignum libraries produce different outputs when given the same
    fuzzer-generated input,
  - An image library produces different outputs when asked to decode the same
    input image several times in a row,
  - A serialization / deserialization library fails to produce stable outputs
    when iteratively serializing and deserializing fuzzer-supplied data,
  - A compression library produces an output inconsistent with the input file
    when asked to compress and then decompress a particular blob.
 Implementing these or similar sanity checks usually takes very little time;
 if you are the maintainer of a particular package, you can make this code
 conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also
 shared with libfuzzer) or `#ifdef __AFL_COMPILER` (this one is just for AFL).
 ## 12) Common-sense risks
 Please keep in mind that, similarly to many other computationally-intensive
 tasks, fuzzing may put strain on your hardware and on the OS. In particular:
  - Your CPU will run hot and will need adequate cooling. In most cases, if
    cooling is insufficient or stops working properly, CPU speeds will be
    automatically throttled. That said, especially when fuzzing on less
    suitable hardware (laptops, smartphones, etc), it's not entirely impossible
    for something to blow up.
  - Targeted programs may end up erratically grabbing gigabytes of memory or
    filling up disk space with junk files. AFL tries to enforce basic memory
    limits, but can't prevent each and every possible mishap. The bottom line
    is that you shouldn't be fuzzing on systems where the prospect of data loss
    is not an acceptable risk.
  - Fuzzing involves billions of reads and writes to the filesystem. On modern
    systems, this will be usually heavily cached, resulting in fairly modest
    "physical" I/O - but there are many factors that may alter this equation.
    It is your responsibility to monitor for potential trouble; with very heavy
    I/O, the lifespan of many HDDs and SSDs may be reduced.
    A good way to monitor disk I/O on Linux is the 'iostat' command:
 ```shell
    $ iostat -d 3 -x -k [...optional disk ID...]
 ```
 ## 13) Known limitations & areas for improvement
 Here are some of the most important caveats for AFL:
  - AFL detects faults by checking for the first spawned process dying due to
    a signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for
    these signals may need to have the relevant code commented out. In the same
    vein, faults in child processed spawned by the fuzzed target may evade
    detection unless you manually add some code to catch that.
  - As with any other brute-force tool, the fuzzer offers limited coverage if
    encryption, checksums, cryptographic signatures, or compression are used to
    wholly wrap the actual data format to be tested.
    To work around this, you can comment out the relevant checks (see
    experimental/libpng_no_checksum/ for inspiration); if this is not possible,
    you can also write a postprocessor, as explained in
    experimental/post_library/.
  - There are some unfortunate trade-offs with ASAN and 64-bit binaries. This
    isn't due to any specific fault of afl-fuzz; see [notes_for_asan.txt](docs/notes_for_asan.txt)
    for tips.
  - There is no direct support for fuzzing network services, background
    daemons, or interactive apps that require UI interaction to work. You may
    need to make simple code changes to make them behave in a more traditional
    way. Preeny may offer a relatively simple option, too - see:
    https://github.com/zardus/preeny
    Some useful tips for modifying network-based services can be also found at:
    https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
  - AFL doesn't output human-readable coverage data. If you want to monitor
    coverage, use afl-cov from Michael Rash: https://github.com/mrash/afl-cov
  - Occasionally, sentient machines rise against their creators. If this
    happens to you, please consult http://lcamtuf.coredump.cx/prep/.
 Beyond this, see INSTALL for platform-specific tips.
 ## 14) Special thanks
 Many of the improvements to afl-fuzz wouldn't be possible without feedback,
 bug reports, or patches from:
 ```
  Jann Horn                             Hanno Boeck
  Felix Groebert                        Jakub Wilk
  Richard W. M. Jones                   Alexander Cherepanov
  Tom Ritter                            Hovik Manucharyan
  Sebastian Roschke                     Eberhard Mattes
  Padraig Brady                         Ben Laurie
  @dronesec                             Luca Barbato
  Tobias Ospelt                         Thomas Jarosch
  Martin Carpenter                      Mudge Zatko
  Joe Zbiciak                           Ryan Govostes
  Michael Rash                          William Robinet
  Jonathan Gray                         Filipe Cabecinhas
  Nico Weber                            Jodie Cunningham
  Andrew Griffiths                      Parker Thompson
  Jonathan Neuschfer                    Tyler Nighswander
  Ben Nagy                              Samir Aguiar
  Aidan Thornton                        Aleksandar Nikolich
  Sam Hakim                             Laszlo Szekeres
  David A. Wheeler                      Turo Lamminen
  Andreas Stieger                       Richard Godbee
  Louis Dassy                           teor2345
  Alex Moneger                          Dmitry Vyukov
  Keegan McAllister                     Kostya Serebryany
  Richo Healey                          Martijn Bogaard
  rc0r                                  Jonathan Foote
  Christian Holler                      Dominique Pelle
  Jacek Wielemborek                     Leo Barnes
  Jeremy Barnes                         Jeff Trull
  Guillaume Endignoux                   ilovezfs
  Daniel Godas-Lopez                    Franjo Ivancic
  Austin Seipp                          Daniel Komaromy
  Daniel Binderman                      Jonathan Metzman
  Vegard Nossum                         Jan Kneschke
  Kurt Roeckx                           Marcel Bohme
  Van-Thuan Pham                        Abhik Roychoudhury
  Joshua J. Drake                       Toby Hutton
  Rene Freingruber                      Sergey Davidoff
  Sami Liedes                           Craig Young
  Andrzej Jackowski                     Daniel Hodson
 ```
 Thank you!
 ## 15) Contact
 Questions? Concerns? Bug reports? Please use GitHub.
 There is also a mailing list for the project; to join, send a mail to
 <afl-users+subscribe@googlegroups.com>. Or, if you prefer to browse
 archives first, try: [https://groups.google.com/group/afl-users](https://groups.google.com/group/afl-users).
--- a/75
+++ b/75
@ -1,75 +0,0 @@
 #!/bin/sh
 #
 # american fuzzy lop - clang assembly normalizer
 # ----------------------------------------------
 #
 # Written and maintained by Michal Zalewski <lcamtuf@google.com>
 # The idea for this wrapper comes from Ryan Govostes.
 #
 # Copyright 2013, 2014 Google LLC All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # This 'as' wrapper should allow you to instrument unruly, hand-written
 # assembly with afl-as.
 #
 # Usage:
 #
 # export AFL_REAL_PATH=/path/to/directory/with/afl-as/
 # AFL_PATH=/path/to/this/directory/ make clean all
 if [ "$#" -lt "2" ]; then
  echo "[-] Error: this utility can't be called directly." 1>&2
  exit 1
 fi
 if [ "$AFL_REAL_PATH" = "" ]; then
  echo "[-] Error: AFL_REAL_PATH not set!" 1>&2
  exit 1
 fi
 if [ ! -x "$AFL_REAL_PATH/afl-as" ]; then
  echo "[-] Error: AFL_REAL_PATH does not contain the 'afl-as' binary." 1>&2
  exit 1
 fi
 unset __AFL_AS_CMDLINE __AFL_FNAME
 while [ ! "$#" = "0" ]; do
  if [ "$#" = "1" ]; then
    __AFL_FNAME="$1"
  else
    __AFL_AS_CMDLINE="${__AFL_AS_CMDLINE} $1"
  fi
  shift
 done
 test "$TMPDIR" = "" && TMPDIR=/tmp
 TMPFILE=`mktemp $TMPDIR/.afl-XXXXXXXXXX.s`
 test "$TMPFILE" = "" && exit 1
 clang -cc1as -filetype asm -output-asm-variant 0 "${__AFL_FNAME}" >"$TMPFILE"
 ERR="$?"
 if [ ! "$ERR" = "0" ]; then
  rm -f "$TMPFILE"
  exit $ERR
 fi
 "$AFL_REAL_PATH/afl-as" ${__AFL_AS_CMDLINE} "$TMPFILE"
 ERR="$?"
 rm -f "$TMPFILE"
 exit "$ERR"
--- a/dictionaries/README.dictionaries
+++ b/dictionaries/README.dictionaries
@ -0,0 +1,43 @@
 ================
 AFL dictionaries
 ================
  (See ../docs/README for the general instruction manual.)
 This subdirectory contains a set of dictionaries that can be used in
 conjunction with the -x option to allow the fuzzer to effortlessly explore the
 grammar of some of the more verbose data formats or languages. The basic
 principle behind the operation of fuzzer dictionaries is outlined in section 9
 of the "main" README for the project.
 Custom dictionaries can be added at will. They should consist of a
 reasonably-sized set of rudimentary syntax units that the fuzzer will then try
 to clobber together in various ways. Snippets between 2 and 16 bytes are usually
 the sweet spot.
 Custom dictionaries can be created in two ways:
  - By creating a new directory and placing each token in a separate file, in
    which case, there is no need to escape or otherwise format the data.
  - By creating a flat text file where tokens are listed one per line in the
    format of name="value". The alphanumeric name is ignored and can be omitted,
    although it is a convenient way to document the meaning of a particular
    token. The value must appear in quotes, with hex escaping (\xNN) applied to
    all non-printable, high-bit, or otherwise problematic characters (\\ and \"
    shorthands are recognized, too).
 The fuzzer auto-selects the appropriate mode depending on whether the -x
 parameter is a file or a directory.
 In the file mode, every name field can be optionally followed by @<num>, e.g.:
  keyword_foo@1 = "foo"
 Such entries will be loaded only if the requested dictionary level is equal or
 higher than this number. The default level is zero; a higher value can be set
 by appending @<num> to the dictionary file name, like so:
  -x path/to/dictionary.dct@2
 Good examples of dictionaries can be found in xml.dict and png.dict.
--- a/dictionaries/gif.dict
+++ b/dictionaries/gif.dict
@ -0,0 +1,18 @@
 #
 # AFL dictionary for GIF images
 # -----------------------------
 #
 # Created by Michal Zalewski <lcamtuf@google.com>
 #
 header_87a="87a"
 header_89a="89a"
 header_gif="GIF"
 marker_2c=","
 marker_3b=";"
 section_2101="!\x01\x12"
 section_21f9="!\xf9\x04"
 section_21fe="!\xfe"
 section_21ff="!\xff\x11"
--- a/dictionaries/html_tags.dict
+++ b/dictionaries/html_tags.dict
@ -0,0 +1,160 @@
 #
 # AFL dictionary for HTML parsers (tags only)
 # -------------------------------------------
 #
 # A basic collection of HTML tags likely to matter to HTML parsers. Does *not*
 # include any attributes or attribute values.
 #
 # Created by Michal Zalewski <lcamtuf@google.com>
 #
 tag_a="<a>"
 tag_abbr="<abbr>"
 tag_acronym="<acronym>"
 tag_address="<address>"
 tag_annotation_xml="<annotation-xml>"
 tag_applet="<applet>"
 tag_area="<area>"
 tag_article="<article>"
 tag_aside="<aside>"
 tag_audio="<audio>"
 tag_b="<b>"
 tag_base="<base>"
 tag_basefont="<basefont>"
 tag_bdi="<bdi>"
 tag_bdo="<bdo>"
 tag_bgsound="<bgsound>"
 tag_big="<big>"
 tag_blink="<blink>"
 tag_blockquote="<blockquote>"
 tag_body="<body>"
 tag_br="<br>"
 tag_button="<button>"
 tag_canvas="<canvas>"
 tag_caption="<caption>"
 tag_center="<center>"
 tag_cite="<cite>"
 tag_code="<code>"
 tag_col="<col>"
 tag_colgroup="<colgroup>"
 tag_data="<data>"
 tag_datalist="<datalist>"
 tag_dd="<dd>"
 tag_del="<del>"
 tag_desc="<desc>"
 tag_details="<details>"
 tag_dfn="<dfn>"
 tag_dir="<dir>"
 tag_div="<div>"
 tag_dl="<dl>"
 tag_dt="<dt>"
 tag_em="<em>"
 tag_embed="<embed>"
 tag_fieldset="<fieldset>"
 tag_figcaption="<figcaption>"
 tag_figure="<figure>"
 tag_font="<font>"
 tag_footer="<footer>"
 tag_foreignobject="<foreignobject>"
 tag_form="<form>"
 tag_frame="<frame>"
 tag_frameset="<frameset>"
 tag_h1="<h1>"
 tag_h2="<h2>"
 tag_h3="<h3>"
 tag_h4="<h4>"
 tag_h5="<h5>"
 tag_h6="<h6>"
 tag_head="<head>"
 tag_header="<header>"
 tag_hgroup="<hgroup>"
 tag_hr="<hr>"
 tag_html="<html>"
 tag_i="<i>"
 tag_iframe="<iframe>"
 tag_image="<image>"
 tag_img="<img>"
 tag_input="<input>"
 tag_ins="<ins>"
 tag_isindex="<isindex>"
 tag_kbd="<kbd>"
 tag_keygen="<keygen>"
 tag_label="<label>"
 tag_legend="<legend>"
 tag_li="<li>"
 tag_link="<link>"
 tag_listing="<listing>"
 tag_main="<main>"
 tag_malignmark="<malignmark>"
 tag_map="<map>"
 tag_mark="<mark>"
 tag_marquee="<marquee>"
 tag_math="<math>"
 tag_menu="<menu>"
 tag_menuitem="<menuitem>"
 tag_meta="<meta>"
 tag_meter="<meter>"
 tag_mglyph="<mglyph>"
 tag_mi="<mi>"
 tag_mn="<mn>"
 tag_mo="<mo>"
 tag_ms="<ms>"
 tag_mtext="<mtext>"
 tag_multicol="<multicol>"
 tag_nav="<nav>"
 tag_nextid="<nextid>"
 tag_nobr="<nobr>"
 tag_noembed="<noembed>"
 tag_noframes="<noframes>"
 tag_noscript="<noscript>"
 tag_object="<object>"
 tag_ol="<ol>"
 tag_optgroup="<optgroup>"
 tag_option="<option>"
 tag_output="<output>"
 tag_p="<p>"
 tag_param="<param>"
 tag_plaintext="<plaintext>"
 tag_pre="<pre>"
 tag_progress="<progress>"
 tag_q="<q>"
 tag_rb="<rb>"
 tag_rp="<rp>"
 tag_rt="<rt>"
 tag_rtc="<rtc>"
 tag_ruby="<ruby>"
 tag_s="<s>"
 tag_samp="<samp>"
 tag_script="<script>"
 tag_section="<section>"
 tag_select="<select>"
 tag_small="<small>"
 tag_source="<source>"
 tag_spacer="<spacer>"
 tag_span="<span>"
 tag_strike="<strike>"
 tag_strong="<strong>"
 tag_style="<style>"
 tag_sub="<sub>"
 tag_summary="<summary>"
 tag_sup="<sup>"
 tag_svg="<svg>"
 tag_table="<table>"
 tag_tbody="<tbody>"
 tag_td="<td>"
 tag_template="<template>"
 tag_textarea="<textarea>"
 tag_tfoot="<tfoot>"
 tag_th="<th>"
 tag_thead="<thead>"
 tag_time="<time>"
 tag_title="<title>"
 tag_tr="<tr>"
 tag_track="<track>"
 tag_tt="<tt>"
 tag_u="<u>"
 tag_ul="<ul>"
 tag_var="<var>"
 tag_video="<video>"
 tag_wbr="<wbr>"
 tag_xmp="<xmp>"
--- a/dictionaries/jpeg.dict
+++ b/dictionaries/jpeg.dict
@ -0,0 +1,22 @@
 #
 # AFL dictionary for JPEG images
 # ------------------------------
 #
 # Created by Michal Zalewski <lcamtuf@google.com>
 #
 header_jfif="JFIF\x00"
 header_jfxx="JFXX\x00"
 section_ffc0="\xff\xc0"
 section_ffc2="\xff\xc2"
 section_ffc4="\xff\xc4"
 section_ffd0="\xff\xd0"
 section_ffd8="\xff\xd8"
 section_ffd9="\xff\xd9"
 section_ffda="\xff\xda"
 section_ffdb="\xff\xdb"
 section_ffdd="\xff\xdd"
 section_ffe0="\xff\xe0"
 section_ffe1="\xff\xe1"
 section_fffe="\xff\xfe"
--- a/dictionaries/js.dict
+++ b/dictionaries/js.dict
@ -0,0 +1,107 @@
 #
 # AFL dictionary for JavaScript
 # -----------------------------
 #
 # Contains basic reserved keywords and syntax building blocks.
 #
 # Created by Michal Zalewski <lcamtuf@google.com>
 #
 keyword_arguments="arguments"
 keyword_break="break"
 keyword_case="case"
 keyword_catch="catch"
 keyword_const="const"
 keyword_continue="continue"
 keyword_debugger="debugger"
 keyword_decodeURI="decodeURI"
 keyword_default="default"
 keyword_delete="delete"
 keyword_do="do"
 keyword_else="else"
 keyword_escape="escape"
 keyword_eval="eval"
 keyword_export="export"
 keyword_finally="finally"
 keyword_for="for (a=0;a<2;a++)"
 keyword_function="function"
 keyword_if="if"
 keyword_in="in"
 keyword_instanceof="instanceof"
 keyword_isNaN="isNaN"
 keyword_let="let"
 keyword_new="new"
 keyword_parseInt="parseInt"
 keyword_return="return"
 keyword_switch="switch"
 keyword_this="this"
 keyword_throw="throw"
 keyword_try="try"
 keyword_typeof="typeof"
 keyword_var="var"
 keyword_void="void"
 keyword_while="while"
 keyword_with="with"
 misc_1=" 1"
 misc_a="a"
 misc_array=" [1]"
 misc_assign=" a=1"
 misc_code_block=" {1}"
 misc_colon_num=" 1:"
 misc_colon_string=" 'a':"
 misc_comma=" ,"
 misc_comment_block=" /* */"
 misc_comment_line=" //"
 misc_cond=" 1?2:3"
 misc_dec=" --"
 misc_div=" /"
 misc_equals=" ="
 misc_fn=" a()"
 misc_identical=" ==="
 misc_inc=" ++"
 misc_minus=" -"
 misc_modulo=" %"
 misc_parentheses=" ()"
 misc_parentheses_1=" (1)"
 misc_parentheses_1x4=" (1,1,1,1)"
 misc_parentheses_a=" (a)"
 misc_period="."
 misc_plus=" +"
 misc_plus_assign=" +="
 misc_regex=" /a/g"
 misc_rol=" <<<"
 misc_semicolon=" ;"
 misc_serialized_object=" {'a': 1}"
 misc_string=" 'a'"
 misc_unicode=" '\\u0001'"
 object_Array=" Array"
 object_Boolean=" Boolean"
 object_Date=" Date"
 object_Function=" Function"
 object_Infinity=" Infinity"
 object_Int8Array=" Int8Array"
 object_Math=" Math"
 object_NaN=" NaN"
 object_Number=" Number"
 object_Object=" Object"
 object_RegExp=" RegExp"
 object_String=" String"
 object_Symbol=" Symbol"
 object_false=" false"
 object_null=" null"
 object_true=" true"
 prop_charAt=".charAt"
 prop_concat=".concat"
 prop_constructor=".constructor"
 prop_destructor=".destructor"
 prop_length=".length"
 prop_match=".match"
 prop_proto=".__proto__"
 prop_prototype=".prototype"
 prop_slice=".slice"
 prop_toCode=".toCode"
 prop_toString=".toString"
 prop_valueOf=".valueOf"
--- a/dictionaries/json.dict
+++ b/dictionaries/json.dict
@ -0,0 +1,52 @@
 #
 # AFL dictionary for JSON
 # -----------------------
 #
 # Just the very basics.
 #
 # Inspired by a dictionary by Jakub Wilk <jwilk@jwilk.net>
 #
 "0"
 ",0"
 ":0"
 "0:"
 "-1.2e+3"
 "true"
 "false"
 "null"
 "\"\""
 ",\"\""
 ":\"\""
 "\"\":"
 "{}"
 ",{}"
 ":{}"
 "{\"\":0}"
 "{{}}"
 "[]"
 ",[]"
 ":[]"
 "[0]"
 "[[]]"
 "''"
 "\\"
 "\\b"
 "\\f"
 "\\n"
 "\\r"
 "\\t"
 "\\u0000"
 "\\x00"
 "\\0"
 "\\uD800\\uDC00"
 "\\uDBFF\\uDFFF"
 "\"\":0"
 "//"
 "/**/"
--- a/dictionaries/pdf.dict
+++ b/dictionaries/pdf.dict
--- a/dictionaries/perl.dict
+++ b/dictionaries/perl.dict
@ -0,0 +1,16 @@
 #
 # AFL dictionary for fuzzing Perl
 # --------------------------------
 #
 # Created by @RandomDhiraj
 #
 "<:crlf"
 "fwrite()"
 "fread()"
 ":raw:utf8"
 ":raw:eol(LF)"
 "Perl_invert()"
 ":raw:eol(CRLF)"
 "Perl_PerlIO_eof()"
--- a/dictionaries/png.dict
+++ b/dictionaries/png.dict
@ -0,0 +1,38 @@
 #
 # AFL dictionary for PNG images
 # -----------------------------
 #
 # Just the basic, standard-originating sections; does not include vendor
 # extensions.
 #
 # Created by Michal Zalewski <lcamtuf@google.com>
 #
 header_png="\x89PNG\x0d\x0a\x1a\x0a"
 section_IDAT="IDAT"
 section_IEND="IEND"
 section_IHDR="IHDR"
 section_PLTE="PLTE"
 section_bKGD="bKGD"
 section_cHRM="cHRM"
 section_fRAc="fRAc"
 section_gAMA="gAMA"
 section_gIFg="gIFg"
 section_gIFt="gIFt"
 section_gIFx="gIFx"
 section_hIST="hIST"
 section_iCCP="iCCP"
 section_iTXt="iTXt"
 section_oFFs="oFFs"
 section_pCAL="pCAL"
 section_pHYs="pHYs"
 section_sBIT="sBIT"
 section_sCAL="sCAL"
 section_sPLT="sPLT"
 section_sRGB="sRGB"
 section_sTER="sTER"
 section_tEXt="tEXt"
 section_tIME="tIME"
 section_tRNS="tRNS"
 section_zTXt="zTXt"
--- a/dictionaries/regexp.dict
+++ b/dictionaries/regexp.dict
@ -0,0 +1,254 @@
 #
 # AFL dictionary for JS regex
 # ---------------------------
 #
 # Contains various regular expressions.
 #
 # Created by Yang Guo <yangguo@chromium.org>
 #
 "?"
 "abc"
 "()"
 "[]"
 "abc|def"
 "abc|def|ghi"
 "^xxx$"
 "ab\\b\\d\\bcd"
 "\\w|\\d"
 "a*?"
 "abc+"
 "abc+?"
 "xyz?"
 "xyz??"
 "xyz{0,1}"
 "xyz{0,1}?"
 "xyz{93}"
 "xyz{1,32}"
 "xyz{1,32}?"
 "xyz{1,}"
 "xyz{1,}?"
 "a\\fb\\nc\\rd\\te\\vf"
 "a\\nb\\bc"
 "(?:foo)"
 "(?: foo )"
 "foo|(bar|baz)|quux"
 "foo(?=bar)baz"
 "foo(?!bar)baz"
 "foo(?<=bar)baz"
 "foo(?<!bar)baz"
 "()"
 "(?=)"
 "[]"
 "[x]"
 "[xyz]"
 "[a-zA-Z0-9]"
 "[-123]"
 "[^123]"
 "]"
 "}"
 "[a-b-c]"
 "[x\\dz]"
 "[\\d-z]"
 "[\\d-\\d]"
 "[z-\\d]"
 "\\cj\\cJ\\ci\\cI\\ck\\cK"
 "\\c!"
 "\\c_"
 "\\c~"
 "[\\c!]"
 "[\\c_]"
 "[\\c~]"
 "[\\ca]"
 "[\\cz]"
 "[\\cA]"
 "[\\cZ]"
 "[\\c1]"
 "\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ "
 "[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]"
 "\\8"
 "\\9"
 "\\11"
 "\\11a"
 "\\011"
 "\\118"
 "\\111"
 "\\1111"
 "(x)(x)(x)\\1"
 "(x)(x)(x)\\2"
 "(x)(x)(x)\\3"
 "(x)(x)(x)\\4"
 "(x)(x)(x)\\1*"
 "(x)(x)(x)\\3*"
 "(x)(x)(x)\\4*"
 "(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10"
 "(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11"
 "(a)\\1"
 "(a\\1)"
 "(\\1a)"
 "(\\2)(\\1)"
 "(?=a){0,10}a"
 "(?=a){1,10}a"
 "(?=a){9,10}a"
 "(?!a)?a"
 "\\1(a)"
 "(?!(a))\\1"
 "(?!\\1(a\\1)\\1)\\1"
 "\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
 "[\\0]"
 "[\\11]"
 "[\\11a]"
 "[\\011]"
 "[\\00011]"
 "[\\118]"
 "[\\111]"
 "[\\1111]"
 "\\x60"
 "\\x3z"
 "\\c"
 "\\u0034"
 "\\u003z"
 "foo[z]*"
 "\\u{12345}"
 "\\u{12345}\\u{23456}"
 "\\u{12345}{3}"
 "\\u{12345}*"
 "\\ud808\\udf45*"
 "[\\ud808\\udf45-\\ud809\\udccc]"
 "a"
 "a|b"
 "a\\n"
 "a$"
 "a\\b!"
 "a\\Bb"
 "a*?"
 "a?"
 "a??"
 "a{0,1}?"
 "a{1,2}?"
 "a+?"
 "(a)"
 "(a)\\1"
 "(\\1a)"
 "\\1(a)"
 "a\\s"
 "a\\S"
 "a\\D"
 "a\\w"
 "a\\W"
 "a."
 "a\\q"
 "a[a]"
 "a[^a]"
 "a[a-z]"
 "a(?:b)"
 "a(?=b)"
 "a(?!b)"
 "\\x60"
 "\\u0060"
 "\\cA"
 "\\q"
 "\\1112"
 "(a)\\1"
 "(?!a)?a\\1"
 "(?:(?=a))a\\1"
 "a{}"
 "a{,}"
 "a{"
 "a{z}"
 "a{12z}"
 "a{12,"
 "a{12,3b"
 "{}"
 "{,}"
 "{"
 "{z}"
 "{1z}"
 "{12,"
 "{12,3b"
 "a"
 "abc"
 "a[bc]d"
 "a|bc"
 "ab|c"
 "a||bc"
 "(?:ab)"
 "(?:ab|cde)"
 "(?:ab)|cde"
 "(ab)"
 "(ab|cde)"
 "(ab)\\1"
 "(ab|cde)\\1"
 "(?:ab)?"
 "(?:ab)+"
 "a?"
 "a+"
 "a??"
 "a*?"
 "a+?"
 "(?:a?)?"
 "(?:a+)?"
 "(?:a?)+"
 "(?:a*)+"
 "(?:a+)+"
 "(?:a?)*"
 "(?:a*)*"
 "(?:a+)*"
 "a{0}"
 "(?:a+){0,0}"
 "a*b"
 "a+b"
 "a*b|c"
 "a+b|c"
 "(?:a{5,1000000}){3,1000000}"
 "(?:ab){4,7}"
 "a\\bc"
 "a\\sc"
 "a\\Sc"
 "a(?=b)c"
 "a(?=bbb|bb)c"
 "a(?!bbb|bb)c"
 "\xe2\x81\xa3"
 "[\xe2\x81\xa3]"
 "\xed\xb0\x80"
 "\xed\xa0\x80"
 "(\xed\xb0\x80)\x01"
 "((\xed\xa0\x80))\x02"
 "\xf0\x9f\x92\xa9"
 "\x01"
 "\x0f"
 "[-\xf0\x9f\x92\xa9]+"
 "[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\xbf]"
 "(?<=)"
 "(?<=a)"
 "(?<!)"
 "(?<!a)"
 "(?<a>)"
 "(?<a>.)"
 "(?<a>.)\\k<a>"
 "\\p{Script=Greek}"
 "\\P{sc=Greek}"
 "\\p{Script_Extensions=Greek}"
 "\\P{scx=Greek}"
 "\\p{General_Category=Decimal_Number}"
 "\\P{gc=Decimal_Number}"
 "\\p{gc=Nd}"
 "\\P{Decimal_Number}"
 "\\p{Nd}"
 "\\P{Any}"
 "\\p{Changes_When_NFKC_Casefolded}"
 "(?:a?)??"
 "a?)"xyz{93}"
 "{93}"
 "a{12za?)?"
 "[\x8f]"
 "[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\x92\xa9-\xf4\x8f\xbf\xbf]"
 "[\x92\xa9-\xf4\x8f\xbf\xbf]"
 "\\1\\2(b\\1\\2))\\2)\\1"
 "\\1\\2(a(?:\\1\\2))\\2)\\1"
 "?:\\1"
 "\\1(b\\1\\2))\\2)\\1"
 "\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
 "foo(?=bar)bar)baz"
 "fo(?o(?o(?o(?=bar)baz"
 "foo(?=bar)baz"
 "foo(?=bar)bar)az"
--- a/dictionaries/sql.dict
+++ b/dictionaries/sql.dict
@ -0,0 +1,282 @@
 #
 # AFL dictionary for SQL
 # ----------------------
 #
 # Modeled based on SQLite documentation, contains some number of SQLite
 # extensions. Other dialects of SQL may benefit from customized dictionaries.
 #
 # If you append @1 to the file name when loading this dictionary, afl-fuzz
 # will also additionally load a selection of pragma keywords that are very
 # specific to SQLite (and are probably less interesting from the security
 # standpoint, because they are usually not allowed in non-privileged
 # contexts).
 #
 # Created by Michal Zalewski <lcamtuf@google.com>
 #
 function_abs=" abs(1)"
 function_avg=" avg(1)"
 function_changes=" changes()"
 function_char=" char(1)"
 function_coalesce=" coalesce(1,1)"
 function_count=" count(1)"
 function_date=" date(1,1,1)"
 function_datetime=" datetime(1,1,1)"
 function_decimal=" decimal(1,1)"
 function_glob=" glob(1,1)"
 function_group_concat=" group_concat(1,1)"
 function_hex=" hex(1)"
 function_ifnull=" ifnull(1,1)"
 function_instr=" instr(1,1)"
 function_julianday=" julianday(1,1,1)"
 function_last_insert_rowid=" last_insert_rowid()"
 function_length=" length(1)"
 function_like=" like(1,1)"
 function_likelihood=" likelihood(1,1)"
 function_likely=" likely(1)"
 function_load_extension=" load_extension(1,1)"
 function_lower=" lower(1)"
 function_ltrim=" ltrim(1,1)"
 function_max=" max(1,1)"
 function_min=" min(1,1)"
 function_nullif=" nullif(1,1)"
 function_printf=" printf(1,1)"
 function_quote=" quote(1)"
 function_random=" random()"
 function_randomblob=" randomblob(1)"
 function_replace=" replace(1,1,1)"
 function_round=" round(1,1)"
 function_rtrim=" rtrim(1,1)"
 function_soundex=" soundex(1)"
 function_sqlite_compileoption_get=" sqlite_compileoption_get(1)"
 function_sqlite_compileoption_used=" sqlite_compileoption_used(1)"
 function_sqlite_source_id=" sqlite_source_id()"
 function_sqlite_version=" sqlite_version()"
 function_strftime=" strftime(1,1,1,1)"
 function_substr=" substr(1,1,1)"
 function_sum=" sum(1)"
 function_time=" time(1,1,1)"
 function_total=" total(1)"
 function_total_changes=" total_changes()"
 function_trim=" trim(1,1)"
 function_typeof=" typeof(1)"
 function_unicode=" unicode(1)"
 function_unlikely=" unlikely(1)"
 function_upper=" upper(1)"
 function_varchar=" varchar(1)"
 function_zeroblob=" zeroblob(1)"
 keyword_ABORT="ABORT"
 keyword_ACTION="ACTION"
 keyword_ADD="ADD"
 keyword_AFTER="AFTER"
 keyword_ALL="ALL"
 keyword_ALTER="ALTER"
 keyword_ANALYZE="ANALYZE"
 keyword_AND="AND"
 keyword_AS="AS"
 keyword_ASC="ASC"
 keyword_ATTACH="ATTACH"
 keyword_AUTOINCREMENT="AUTOINCREMENT"
 keyword_BEFORE="BEFORE"
 keyword_BEGIN="BEGIN"
 keyword_BETWEEN="BETWEEN"
 keyword_BY="BY"
 keyword_CASCADE="CASCADE"
 keyword_CASE="CASE"
 keyword_CAST="CAST"
 keyword_CHECK="CHECK"
 keyword_COLLATE="COLLATE"
 keyword_COLUMN="COLUMN"
 keyword_COMMIT="COMMIT"
 keyword_CONFLICT="CONFLICT"
 keyword_CONSTRAINT="CONSTRAINT"
 keyword_CREATE="CREATE"
 keyword_CROSS="CROSS"
 keyword_CURRENT_DATE="CURRENT_DATE"
 keyword_CURRENT_TIME="CURRENT_TIME"
 keyword_CURRENT_TIMESTAMP="CURRENT_TIMESTAMP"
 keyword_DATABASE="DATABASE"
 keyword_DEFAULT="DEFAULT"
 keyword_DEFERRABLE="DEFERRABLE"
 keyword_DEFERRED="DEFERRED"
 keyword_DELETE="DELETE"
 keyword_DESC="DESC"
 keyword_DETACH="DETACH"
 keyword_DISTINCT="DISTINCT"
 keyword_DROP="DROP"
 keyword_EACH="EACH"
 keyword_ELSE="ELSE"
 keyword_END="END"
 keyword_ESCAPE="ESCAPE"
 keyword_EXCEPT="EXCEPT"
 keyword_EXCLUSIVE="EXCLUSIVE"
 keyword_EXISTS="EXISTS"
 keyword_EXPLAIN="EXPLAIN"
 keyword_FAIL="FAIL"
 keyword_FOR="FOR"
 keyword_FOREIGN="FOREIGN"
 keyword_FROM="FROM"
 keyword_FULL="FULL"
 keyword_GLOB="GLOB"
 keyword_GROUP="GROUP"
 keyword_HAVING="HAVING"
 keyword_IF="IF"
 keyword_IGNORE="IGNORE"
 keyword_IMMEDIATE="IMMEDIATE"
 keyword_IN="IN"
 keyword_INDEX="INDEX"
 keyword_INDEXED="INDEXED"
 keyword_INITIALLY="INITIALLY"
 keyword_INNER="INNER"
 keyword_INSERT="INSERT"
 keyword_INSTEAD="INSTEAD"
 keyword_INTERSECT="INTERSECT"
 keyword_INTO="INTO"
 keyword_IS="IS"
 keyword_ISNULL="ISNULL"
 keyword_JOIN="JOIN"
 keyword_KEY="KEY"
 keyword_LEFT="LEFT"
 keyword_LIKE="LIKE"
 keyword_LIMIT="LIMIT"
 keyword_MATCH="MATCH"
 keyword_NATURAL="NATURAL"
 keyword_NO="NO"
 keyword_NOT="NOT"
 keyword_NOTNULL="NOTNULL"
 keyword_NULL="NULL"
 keyword_OF="OF"
 keyword_OFFSET="OFFSET"
 keyword_ON="ON"
 keyword_OR="OR"
 keyword_ORDER="ORDER"
 keyword_OUTER="OUTER"
 keyword_PLAN="PLAN"
 keyword_PRAGMA="PRAGMA"
 keyword_PRIMARY="PRIMARY"
 keyword_QUERY="QUERY"
 keyword_RAISE="RAISE"
 keyword_RECURSIVE="RECURSIVE"
 keyword_REFERENCES="REFERENCES"
 keyword_REGEXP="REGEXP"
 keyword_REINDEX="REINDEX"
 keyword_RELEASE="RELEASE"
 keyword_RENAME="RENAME"
 keyword_REPLACE="REPLACE"
 keyword_RESTRICT="RESTRICT"
 keyword_RIGHT="RIGHT"
 keyword_ROLLBACK="ROLLBACK"
 keyword_ROW="ROW"
 keyword_SAVEPOINT="SAVEPOINT"
 keyword_SELECT="SELECT"
 keyword_SET="SET"
 keyword_TABLE="TABLE"
 keyword_TEMP="TEMP"
 keyword_TEMPORARY="TEMPORARY"
 keyword_THEN="THEN"
 keyword_TO="TO"
 keyword_TRANSACTION="TRANSACTION"
 keyword_TRIGGER="TRIGGER"
 keyword_UNION="UNION"
 keyword_UNIQUE="UNIQUE"
 keyword_UPDATE="UPDATE"
 keyword_USING="USING"
 keyword_VACUUM="VACUUM"
 keyword_VALUES="VALUES"
 keyword_VIEW="VIEW"
 keyword_VIRTUAL="VIRTUAL"
 keyword_WHEN="WHEN"
 keyword_WHERE="WHERE"
 keyword_WITH="WITH"
 keyword_WITHOUT="WITHOUT"
 operator_concat=" || "
 operator_ebove_eq=" >="
 snippet_1eq1=" 1=1"
 snippet_at=" @1"
 snippet_backticks=" `a`"
 snippet_blob=" blob"
 snippet_brackets=" [a]"
 snippet_colon=" :1"
 snippet_comment=" /* */"
 snippet_date="2001-01-01"
 snippet_dollar=" $1"
 snippet_dotref=" a.b"
 snippet_fmtY="%Y"
 snippet_int=" int"
 snippet_neg1=" -1"
 snippet_pair=" a,b"
 snippet_parentheses=" (1)"
 snippet_plus2days="+2 days"
 snippet_qmark=" ?1"
 snippet_semicolon=" ;"
 snippet_star=" *"
 snippet_string_pair=" \"a\",\"b\""
 string_dbl_q=" \"a\""
 string_escaped_q=" 'a''b'"
 string_single_q=" 'a'"
 pragma_application_id@1=" application_id"
 pragma_auto_vacuum@1=" auto_vacuum"
 pragma_automatic_index@1=" automatic_index"
 pragma_busy_timeout@1=" busy_timeout"
 pragma_cache_size@1=" cache_size"
 pragma_cache_spill@1=" cache_spill"
 pragma_case_sensitive_like@1=" case_sensitive_like"
 pragma_checkpoint_fullfsync@1=" checkpoint_fullfsync"
 pragma_collation_list@1=" collation_list"
 pragma_compile_options@1=" compile_options"
 pragma_count_changes@1=" count_changes"
 pragma_data_store_directory@1=" data_store_directory"
 pragma_database_list@1=" database_list"
 pragma_default_cache_size@1=" default_cache_size"
 pragma_defer_foreign_keys@1=" defer_foreign_keys"
 pragma_empty_result_callbacks@1=" empty_result_callbacks"
 pragma_encoding@1=" encoding"
 pragma_foreign_key_check@1=" foreign_key_check"
 pragma_foreign_key_list@1=" foreign_key_list"
 pragma_foreign_keys@1=" foreign_keys"
 pragma_freelist_count@1=" freelist_count"
 pragma_full_column_names@1=" full_column_names"
 pragma_fullfsync@1=" fullfsync"
 pragma_ignore_check_constraints@1=" ignore_check_constraints"
 pragma_incremental_vacuum@1=" incremental_vacuum"
 pragma_index_info@1=" index_info"
 pragma_index_list@1=" index_list"
 pragma_integrity_check@1=" integrity_check"
 pragma_journal_mode@1=" journal_mode"
 pragma_journal_size_limit@1=" journal_size_limit"
 pragma_legacy_file_format@1=" legacy_file_format"
 pragma_locking_mode@1=" locking_mode"
 pragma_max_page_count@1=" max_page_count"
 pragma_mmap_size@1=" mmap_size"
 pragma_page_count@1=" page_count"
 pragma_page_size@1=" page_size"
 pragma_parser_trace@1=" parser_trace"
 pragma_query_only@1=" query_only"
 pragma_quick_check@1=" quick_check"
 pragma_read_uncommitted@1=" read_uncommitted"
 pragma_recursive_triggers@1=" recursive_triggers"
 pragma_reverse_unordered_selects@1=" reverse_unordered_selects"
 pragma_schema_version@1=" schema_version"
 pragma_secure_delete@1=" secure_delete"
 pragma_short_column_names@1=" short_column_names"
 pragma_shrink_memory@1=" shrink_memory"
 pragma_soft_heap_limit@1=" soft_heap_limit"
 pragma_stats@1=" stats"
 pragma_synchronous@1=" synchronous"
 pragma_table_info@1=" table_info"
 pragma_temp_store@1=" temp_store"
 pragma_temp_store_directory@1=" temp_store_directory"
 pragma_threads@1=" threads"
 pragma_user_version@1=" user_version"
 pragma_vdbe_addoptrace@1=" vdbe_addoptrace"
 pragma_vdbe_debug@1=" vdbe_debug"
 pragma_vdbe_listing@1=" vdbe_listing"
 pragma_vdbe_trace@1=" vdbe_trace"
 pragma_wal_autocheckpoint@1=" wal_autocheckpoint"
 pragma_wal_checkpoint@1=" wal_checkpoint"
 pragma_writable_schema@1=" writable_schema"
--- a/dictionaries/tiff.dict
+++ b/dictionaries/tiff.dict
@ -0,0 +1,51 @@
 #
 # AFL dictionary for TIFF images
 # ------------------------------
 #
 # Just the basic, standard-originating sections; does not include vendor
 # extensions.
 #
 # Created by Michal Zalewski <lcamtuf@google.com>
 #
 header_ii="II*\x00"
 header_mm="MM\x00*"
 section_100="\x00\x01"
 section_101="\x01\x01"
 section_102="\x02\x01"
 section_103="\x03\x01"
 section_106="\x06\x01"
 section_107="\x07\x01"
 section_10D="\x0d\x01"
 section_10E="\x0e\x01"
 section_10F="\x0f\x01"
 section_110="\x10\x01"
 section_111="\x11\x01"
 section_112="\x12\x01"
 section_115="\x15\x01"
 section_116="\x16\x01"
 section_117="\x17\x01"
 section_11A="\x1a\x01"
 section_11B="\x1b\x01"
 section_11C="\x1c\x01"
 section_11D="\x1d\x01"
 section_11E="\x1e\x01"
 section_11F="\x1f\x01"
 section_122="\"\x01"
 section_123="#\x01"
 section_124="$\x01"
 section_125="%\x01"
 section_128="(\x01"
 section_129=")\x01"
 section_12D="-\x01"
 section_131="1\x01"
 section_132="2\x01"
 section_13B=";\x01"
 section_13C="<\x01"
 section_13D="=\x01"
 section_13E=">\x01"
 section_13F="?\x01"
 section_140="@\x01"
 section_FE="\xfe\x00"
 section_FF="\xff\x00"
--- a/dictionaries/webp.dict
+++ b/dictionaries/webp.dict
@ -0,0 +1,20 @@
 #
 # AFL dictionary for WebP images
 # ------------------------------
 #
 # Created by Michal Zalewski <lcamtuf@google.com>
 #
 header_RIFF="RIFF"
 header_WEBP="WEBP"
 section_ALPH="ALPH"
 section_ANIM="ANIM"
 section_ANMF="ANMF"
 section_EXIF="EXIF"
 section_FRGM="FRGM"
 section_ICCP="ICCP"
 section_VP8="VP8 "
 section_VP8L="VP8L"
 section_VP8X="VP8X"
 section_XMP="XMP "
--- a/dictionaries/xml.dict
+++ b/dictionaries/xml.dict
@ -0,0 +1,72 @@
 #
 # AFL dictionary for XML
 # ----------------------
 #
 # Several basic syntax elements and attributes, modeled on libxml2.
 #
 # Created by Michal Zalewski <lcamtuf@google.com>
 #
 attr_encoding=" encoding=\"1\""
 attr_generic=" a=\"1\""
 attr_href=" href=\"1\""
 attr_standalone=" standalone=\"no\""
 attr_version=" version=\"1\""
 attr_xml_base=" xml:base=\"1\""
 attr_xml_id=" xml:id=\"1\""
 attr_xml_lang=" xml:lang=\"1\""
 attr_xml_space=" xml:space=\"1\""
 attr_xmlns=" xmlns=\"1\""
 entity_builtin="&lt;"
 entity_decimal="&#1;"
 entity_external="&a;"
 entity_hex="&#x1;"
 string_any="ANY"
 string_brackets="[]"
 string_cdata="CDATA"
 string_col_fallback=":fallback"
 string_col_generic=":a"
 string_col_include=":include"
 string_dashes="--"
 string_empty="EMPTY"
 string_empty_dblquotes="\"\""
 string_empty_quotes="''"
 string_entities="ENTITIES"
 string_entity="ENTITY"
 string_fixed="#FIXED"
 string_id="ID"
 string_idref="IDREF"
 string_idrefs="IDREFS"
 string_implied="#IMPLIED"
 string_nmtoken="NMTOKEN"
 string_nmtokens="NMTOKENS"
 string_notation="NOTATION"
 string_parentheses="()"
 string_pcdata="#PCDATA"
 string_percent="%a"
 string_public="PUBLIC"
 string_required="#REQUIRED"
 string_schema=":schema"
 string_system="SYSTEM"
 string_ucs4="UCS-4"
 string_utf16="UTF-16"
 string_utf8="UTF-8"
 string_xmlns="xmlns:"
 tag_attlist="<!ATTLIST"
 tag_cdata="<![CDATA["
 tag_close="</a>"
 tag_doctype="<!DOCTYPE"
 tag_element="<!ELEMENT"
 tag_entity="<!ENTITY"
 tag_ignore="<![IGNORE["
 tag_include="<![INCLUDE["
 tag_notation="<!NOTATION"
 tag_open="<a>"
 tag_open_close="<a />"
 tag_open_exclamation="<!"
 tag_open_q="<?"
 tag_sq2_close="]]>"
 tag_xml_q="<?xml?>"
--- a/docs/COPYING
+++ b/docs/COPYING
@ -0,0 +1,202 @@
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
   1. Definitions.
      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.
      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.
      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.
      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.
      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.
      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.
      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).
      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.
      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."
      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.
   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.
   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.
   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:
      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and
      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and
      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and
      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.
      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.
   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.
   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.
   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.
   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.
   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.
   END OF TERMS AND CONDITIONS
   APPENDIX: How to apply the Apache License to your work.
      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.
   Copyright [yyyy] [name of copyright owner]
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
       http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
--- a/docs/ChangeLog
+++ b/docs/ChangeLog
--- a/docs/INSTALL
+++ b/docs/INSTALL
@ -0,0 +1,183 @@
 =========================
 Installation instructions
 =========================
  This document provides basic installation instructions and discusses known
  issues for a variety of platforms. See README for the general instruction
  manual.
 1) Linux on x86
 ---------------
 This platform is expected to work well. Compile the program with:
 $ make
 You can start using the fuzzer without installation, but it is also possible to
 install it with:
 # make install
 There are no special dependencies to speak of; you will need GNU make and a
 working compiler (gcc or clang). Some of the optional scripts bundled with the
 program may depend on bash, gdb, and similar basic tools.
 If you are using clang, please review llvm_mode/README.llvm; the LLVM
 integration mode can offer substantial performance gains compared to the
 traditional approach.
 You may have to change several settings to get optimal results (most notably,
 disable crash reporting utilities and switch to a different CPU governor), but
 afl-fuzz will guide you through that if necessary.
 2) OpenBSD, FreeBSD, NetBSD on x86
 ----------------------------------
 Similarly to Linux, these platforms are expected to work well and are
 regularly tested. Compile everything with GNU make:
 $ gmake
 Note that BSD make will *not* work; if you do not have gmake on your system,
 please install it first. As on Linux, you can use the fuzzer itself without
 installation, or install it with:
 # gmake install
 Keep in mind that if you are using csh as your shell, the syntax of some of the
 shell commands given in the README and other docs will be different.
 The llvm_mode requires a dynamically linked, fully-operational installation of
 clang. At least on FreeBSD, the clang binaries are static and do not include
 some of the essential tools, so if you want to make it work, you may need to
 follow the instructions in llvm_mode/README.llvm.
 Beyond that, everything should work as advertised.
 The QEMU mode is currently supported only on Linux. I think it's just a QEMU
 problem, I couldn't get a vanilla copy of user-mode emulation support working
 correctly on BSD at all.
 3) MacOS X on x86
 -----------------
 MacOS X should work, but there are some gotchas due to the idiosyncrasies of
 the platform. On top of this, I have limited release testing capabilities
 and depend mostly on user feedback.
 To build AFL, install Xcode and follow the general instructions for Linux.
 The Xcode 'gcc' tool is just a wrapper for clang, so be sure to use afl-clang
 to compile any instrumented binaries; afl-gcc will fail unless you have GCC
 installed from another source (in which case, please specify AFL_CC and
 AFL_CXX to point to the "real" GCC binaries).
 Only 64-bit compilation will work on the platform; porting the 32-bit
 instrumentation would require a fair amount of work due to the way OS X
 handles relocations, and today, virtually all MacOS X boxes are 64-bit.
 The crash reporting daemon that comes by default with MacOS X will cause
 problems with fuzzing. You need to turn it off by following the instructions
 provided here: http://goo.gl/CCcd5u
 The fork() semantics on OS X are a bit unusual compared to other unix systems
 and definitely don't look POSIX-compliant. This means two things:
  - Fuzzing will be probably slower than on Linux. In fact, some folks report
    considerable performance gains by running the jobs inside a Linux VM on
    MacOS X.
  - Some non-portable, platform-specific code may be incompatible with the
    AFL forkserver. If you run into any problems, set AFL_NO_FORKSRV=1 in the
    environment before starting afl-fuzz.
 User emulation mode of QEMU does not appear to be supported on MacOS X, so
 black-box instrumentation mode (-Q) will not work.
 The llvm_mode requires a fully-operational installation of clang. The one that
 comes with Xcode is missing some of the essential headers and helper tools.
 See llvm_mode/README.llvm for advice on how to build the compiler from scratch.
 4) Linux or *BSD on non-x86 systems
 -----------------------------------
 Standard build will fail on non-x86 systems, but you should be able to
 leverage two other options:
  - The LLVM mode (see llvm_mode/README.llvm), which does not rely on
    x86-specific assembly shims. It's fast and robust, but requires a
    complete installation of clang.
  - The QEMU mode (see qemu_mode/README.qemu), which can be also used for
    fuzzing cross-platform binaries. It's slower and more fragile, but
    can be used even when you don't have the source for the tested app.
 If you're not sure what you need, you need the LLVM mode. To get it, try:
 $ AFL_NO_X86=1 gmake && gmake -C llvm_mode
 ...and compile your target program with afl-clang-fast or afl-clang-fast++
 instead of the traditional afl-gcc or afl-clang wrappers.
 5) Solaris on x86
 -----------------
 The fuzzer reportedly works on Solaris, but I have not tested this first-hand,
 and the user base is fairly small, so I don't have a lot of feedback.
 To get the ball rolling, you will need to use GNU make and GCC or clang. I'm
 being told that the stock version of GCC that comes with the platform does not
 work properly due to its reliance on a hardcoded location for 'as' (completely
 ignoring the -B parameter or $PATH).
 To fix this, you may want to build stock GCC from the source, like so:
 $ ./configure --prefix=$HOME/gcc --with-gnu-as --with-gnu-ld \
  --with-gmp-include=/usr/include/gmp --with-mpfr-include=/usr/include/mpfr
 $ make
 $ sudo make install
 Do *not* specify --with-as=/usr/gnu/bin/as - this will produce a GCC binary that
 ignores the -B flag and you will be back to square one.
 Note that Solaris reportedly comes with crash reporting enabled, which causes
 problems with crashes being misinterpreted as hangs, similarly to the gotchas
 for Linux and MacOS X. AFL does not auto-detect crash reporting on this
 particular platform, but you may need to run the following command:
 $ coreadm -d global -d global-setid -d process -d proc-setid \
  -d kzone -d log
 User emulation mode of QEMU is not available on Solaris, so black-box
 instrumentation mode (-Q) will not work.
 6) Everything else
 ------------------
 You're on your own. On POSIX-compliant systems, you may be able to compile and
 run the fuzzer; and the LLVM mode may offer a way to instrument non-x86 code.
 The fuzzer will not run on Windows. It will also not work under Cygwin. It
 could be ported to the latter platform fairly easily, but it's a pretty bad
 idea, because Cygwin is extremely slow. It makes much more sense to use
 VirtualBox or so to run a hardware-accelerated Linux VM; it will run around
 20x faster or so. If you have a *really* compelling use case for Cygwin, let
 me know.
 Although Android on x86 should theoretically work, the stock kernel may have
 SHM support compiled out, and if so, you may have to address that issue first.
 It's possible that all you need is this workaround:
  https://github.com/pelya/android-shmem
 Joshua J. Drake notes that the Android linker adds a shim that automatically
 intercepts SIGSEGV and related signals. To fix this issue and be able to see
 crashes, you need to put this at the beginning of the fuzzed program:
  signal(SIGILL, SIG_DFL);
  signal(SIGABRT, SIG_DFL);
  signal(SIGBUS, SIG_DFL);
  signal(SIGFPE, SIG_DFL);
  signal(SIGSEGV, SIG_DFL);
 You may need to #include <signal.h> first.
--- a/docs/QuickStartGuide.txt
+++ b/docs/QuickStartGuide.txt
@ -0,0 +1,49 @@
 =====================
 AFL quick start guide
 =====================
 You should read docs/README. It's pretty short. If you really can't, here's
 how to hit the ground running:
 1) Compile AFL with 'make'. If build fails, see docs/INSTALL for tips.
 2) Find or write a reasonably fast and simple program that takes data from
   a file or stdin, processes it in a test-worthy way, then exits cleanly.
   If testing a network service, modify it to run in the foreground and read
   from stdin. When fuzzing a format that uses checksums, comment out the
   checksum verification code, too.
   The program must crash properly when a fault is encountered. Watch out for
   custom SIGSEGV or SIGABRT handlers and background processes. For tips on
   detecting non-crashing flaws, see section 11 in docs/README.
 3) Compile the program / library to be fuzzed using afl-gcc. A common way to
   do this would be:
   CC=/path/to/afl-gcc CXX=/path/to/afl-g++ ./configure --disable-shared
   make clean all
   If program build fails, ping <afl-users@googlegroups.com>.
 4) Get a small but valid input file that makes sense to the program. When
   fuzzing verbose syntax (SQL, HTTP, etc), create a dictionary as described in
   dictionaries/README.dictionaries, too.
 5) If the program reads from stdin, run 'afl-fuzz' like so:
   ./afl-fuzz -i testcase_dir -o findings_dir -- \
     /path/to/tested/program [...program's cmdline...]
   If the program takes input from a file, you can put @@ in the program's
   command line; AFL will put an auto-generated file name in there for you.
 6) Investigate anything shown in red in the fuzzer UI by promptly consulting
   docs/status_screen.txt.
 That's it. Sit back, relax, and - time permitting - try to skim through the
 following files:
  - docs/README               - A general introduction to AFL,
  - docs/perf_tips.txt        - Simple tips on how to fuzz more quickly,
  - docs/status_screen.txt    - An explanation of the tidbits shown in the UI,
  - docs/parallel_fuzzing.txt - Advice on running AFL on multiple cores.
--- a/docs/env_variables.txt
+++ b/docs/env_variables.txt
@ -0,0 +1,281 @@
 =======================
 Environmental variables
 =======================
  This document discusses the environment variables used by American Fuzzy Lop
  to expose various exotic functions that may be (rarely) useful for power
  users or for some types of custom fuzzing setups. See README for the general
  instruction manual.
 1) Settings for afl-gcc, afl-clang, and afl-as
 ----------------------------------------------
 Because they can't directly accept command-line options, the compile-time
 tools make fairly broad use of environmental variables:
  - Setting AFL_HARDEN automatically adds code hardening options when invoking
    the downstream compiler. This currently includes -D_FORTIFY_SOURCE=2 and
    -fstack-protector-all. The setting is useful for catching non-crashing
    memory bugs at the expense of a very slight (sub-5%) performance loss.
  - By default, the wrapper appends -O3 to optimize builds. Very rarely, this
    will cause problems in programs built with -Werror, simply because -O3
    enables more thorough code analysis and can spew out additional warnings.
    To disable optimizations, set AFL_DONT_OPTIMIZE.
  - Setting AFL_USE_ASAN automatically enables ASAN, provided that your
    compiler supports that. Note that fuzzing with ASAN is mildly challenging
    - see notes_for_asan.txt.
    (You can also enable MSAN via AFL_USE_MSAN; ASAN and MSAN come with the
    same gotchas; the modes are mutually exclusive. UBSAN and other exotic
    sanitizers are not officially supported yet, but are easy to get to work
    by hand.)
  - Setting AFL_CC, AFL_CXX, and AFL_AS lets you use alternate downstream
    compilation tools, rather than the default 'clang', 'gcc', or 'as' binaries
    in your $PATH.
  - AFL_PATH can be used to point afl-gcc to an alternate location of afl-as.
    One possible use of this is experimental/clang_asm_normalize/, which lets
    you instrument hand-written assembly when compiling clang code by plugging
    a normalizer into the chain. (There is no equivalent feature for GCC.)
  - Setting AFL_INST_RATIO to a percentage between 0 and 100% controls the
    probability of instrumenting every branch. This is (very rarely) useful
    when dealing with exceptionally complex programs that saturate the output
    bitmap. Examples include v8, ffmpeg, and perl.
    (If this ever happens, afl-fuzz will warn you ahead of the time by
    displaying the "bitmap density" field in fiery red.)
    Setting AFL_INST_RATIO to 0 is a valid choice. This will instrument only
    the transitions between function entry points, but not individual branches.
  - AFL_NO_BUILTIN causes the compiler to generate code suitable for use with
    libtokencap.so (but perhaps running a bit slower than without the flag).
  - TMPDIR is used by afl-as for temporary files; if this variable is not set,
    the tool defaults to /tmp.
  - Setting AFL_KEEP_ASSEMBLY prevents afl-as from deleting instrumented
    assembly files. Useful for troubleshooting problems or understanding how
    the tool works. To get them in a predictable place, try something like:
    mkdir assembly_here
    TMPDIR=$PWD/assembly_here AFL_KEEP_ASSEMBLY=1 make clean all
  - Setting AFL_QUIET will prevent afl-cc and afl-as banners from being
    displayed during compilation, in case you find them distracting.
 2) Settings for afl-clang-fast
 ------------------------------
 The native LLVM instrumentation helper accepts a subset of the settings
 discussed in section #1, with the exception of:
  - AFL_AS, since this toolchain does not directly invoke GNU as.
  - TMPDIR and AFL_KEEP_ASSEMBLY, since no temporary assembly files are
    created.
 Note that AFL_INST_RATIO will behave a bit differently than for afl-gcc,
 because functions are *not* instrumented unconditionally - so low values
 will have a more striking effect. For this tool, 0 is not a valid choice.
 3) Settings for afl-fuzz
 ------------------------
 The main fuzzer binary accepts several options that disable a couple of sanity
 checks or alter some of the more exotic semantics of the tool:
  - Setting AFL_SKIP_CPUFREQ skips the check for CPU scaling policy. This is
    useful if you can't change the defaults (e.g., no root access to the
    system) and are OK with some performance loss.
  - Setting AFL_NO_FORKSRV disables the forkserver optimization, reverting to
    fork + execve() call for every tested input. This is useful mostly when
    working with unruly libraries that create threads or do other crazy
    things when initializing (before the instrumentation has a chance to run).
    Note that this setting inhibits some of the user-friendly diagnostics
    normally done when starting up the forkserver and causes a pretty
    significant performance drop.
  - AFL_EXIT_WHEN_DONE causes afl-fuzz to terminate when all existing paths
    have been fuzzed and there were no new finds for a while. This would be
    normally indicated by the cycle counter in the UI turning green. May be
    convenient for some types of automated jobs.
  - Setting AFL_NO_AFFINITY disables attempts to bind to a specific CPU core
    on Linux systems. This slows things down, but lets you run more instances
    of afl-fuzz than would be prudent (if you really want to).
  - AFL_SKIP_CRASHES causes AFL to tolerate crashing files in the input
    queue. This can help with rare situations where a program crashes only
    intermittently, but it's not really recommended under normal operating
    conditions.
  - Setting AFL_HANG_TMOUT allows you to specify a different timeout for
    deciding if a particular test case is a "hang". The default is 1 second
    or the value of the -t parameter, whichever is larger. Dialing the value
    down can be useful if you are very concerned about slow inputs, or if you
    don't want AFL to spend too much time classifying that stuff and just 
    rapidly put all timeouts in that bin.
  - AFL_NO_ARITH causes AFL to skip most of the deterministic arithmetics.
    This can be useful to speed up the fuzzing of text-based file formats.
  - AFL_SHUFFLE_QUEUE randomly reorders the input queue on startup. Requested
    by some users for unorthodox parallelized fuzzing setups, but not
    advisable otherwise.
  - When developing custom instrumentation on top of afl-fuzz, you can use
    AFL_SKIP_BIN_CHECK to inhibit the checks for non-instrumented binaries
    and shell scripts; and AFL_DUMB_FORKSRV in conjunction with the -n
    setting to instruct afl-fuzz to still follow the fork server protocol
    without expecting any instrumentation data in return.
  - When running in the -M or -S mode, setting AFL_IMPORT_FIRST causes the
    fuzzer to import test cases from other instances before doing anything
    else. This makes the "own finds" counter in the UI more accurate.
    Beyond counter aesthetics, not much else should change.
  - Setting AFL_POST_LIBRARY allows you to configure a postprocessor for
    mutated files - say, to fix up checksums. See experimental/post_library/
    for more.
  - AFL_FAST_CAL keeps the calibration stage about 2.5x faster (albeit less
    precise), which can help when starting a session against a slow target.
  - The CPU widget shown at the bottom of the screen is fairly simplistic and
    may complain of high load prematurely, especially on systems with low core
    counts. To avoid the alarming red color, you can set AFL_NO_CPU_RED.
  - In QEMU mode (-Q), AFL_PATH will be searched for afl-qemu-trace.
  - Setting AFL_PRELOAD causes AFL to set LD_PRELOAD for the target binary
    without disrupting the afl-fuzz process itself. This is useful, among other
    things, for bootstrapping libdislocator.so.
  - Setting AFL_NO_UI inhibits the UI altogether, and just periodically prints
    some basic stats. This behavior is also automatically triggered when the
    output from afl-fuzz is redirected to a file or to a pipe.
  - If you are Jakub, you may need AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES.
    Others need not apply.
  - Benchmarking only: AFL_BENCH_JUST_ONE causes the fuzzer to exit after
    processing the first queue entry; and AFL_BENCH_UNTIL_CRASH causes it to
    exit soon after the first crash is found.
 4) Settings for afl-qemu-trace
 ------------------------------
 The QEMU wrapper used to instrument binary-only code supports several settings:
  - It is possible to set AFL_INST_RATIO to skip the instrumentation on some
    of the basic blocks, which can be useful when dealing with very complex
    binaries.
  - Setting AFL_INST_LIBS causes the translator to also instrument the code
    inside any dynamically linked libraries (notably including glibc).
  - The underlying QEMU binary will recognize any standard "user space
    emulation" variables (e.g., QEMU_STACK_SIZE), but there should be no
    reason to touch them.
 5) Settings for afl-cmin
 ------------------------
 The corpus minimization script offers very little customization:
  - Setting AFL_PATH offers a way to specify the location of afl-showmap
    and afl-qemu-trace (the latter only in -Q mode).
  - AFL_KEEP_TRACES makes the tool keep traces and other metadata used for
    minimization and normally deleted at exit. The files can be found in the
    <out_dir>/.traces/*.
  - AFL_ALLOW_TMP permits this and some other scripts to run in /tmp. This is
    a modest security risk on multi-user systems with rogue users, but should
    be safe on dedicated fuzzing boxes.
 6) Settings for afl-tmin
 ------------------------
 Virtually nothing to play with. Well, in QEMU mode (-Q), AFL_PATH will be
 searched for afl-qemu-trace. In addition to this, TMPDIR may be used if a
 temporary file can't be created in the current working directory.
 You can specify AFL_TMIN_EXACT if you want afl-tmin to require execution paths
 to match when minimizing crashes. This will make minimization less useful, but
 may prevent the tool from "jumping" from one crashing condition to another in
 very buggy software. You probably want to combine it with the -e flag.
 7) Settings for afl-analyze
 ---------------------------
 You can set AFL_ANALYZE_HEX to get file offsets printed as hexadecimal instead
 of decimal.
 8) Settings for libdislocator.so
 --------------------------------
 The library honors three environmental variables:
  - AFL_LD_LIMIT_MB caps the size of the maximum heap usage permitted by the
    library, in megabytes. The default value is 1 GB. Once this is exceeded,
    allocations will return NULL.
  - AFL_LD_HARD_FAIL alters the behavior by calling abort() on excessive
    allocations, thus causing what AFL would perceive as a crash. Useful for
    programs that are supposed to maintain a specific memory footprint.
  - AFL_LD_VERBOSE causes the library to output some diagnostic messages
    that may be useful for pinpointing the cause of any observed issues.
  - AFL_LD_NO_CALLOC_OVER inhibits abort() on calloc() overflows. Most
    of the common allocators check for that internally and return NULL, so
    it's a security risk only in more exotic setups.
 9) Settings for libtokencap.so
 ------------------------------
 This library accepts AFL_TOKEN_FILE to indicate the location to which the
 discovered tokens should be written.
 10) Third-party variables set by afl-fuzz & other tools
 -------------------------------------------------------
 Several variables are not directly interpreted by afl-fuzz, but are set to
 optimal values if not already present in the environment:
  - By default, LD_BIND_NOW is set to speed up fuzzing by forcing the
    linker to do all the work before the fork server kicks in. You can
    override this by setting LD_BIND_LAZY beforehand, but it is almost
    certainly pointless.
  - By default, ASAN_OPTIONS are set to:
    abort_on_error=1
    detect_leaks=0
    symbolize=0
    allocator_may_return_null=1
    If you want to set your own options, be sure to include abort_on_error=1 -
    otherwise, the fuzzer will not be able to detect crashes in the tested
    app. Similarly, include symbolize=0, since without it, AFL may have
    difficulty telling crashes and hangs apart.
  - In the same vein, by default, MSAN_OPTIONS are set to:
    exit_code=86 (required for legacy reasons)    
    abort_on_error=1
    symbolize=0
    msan_track_origins=0
    allocator_may_return_null=1
    Be sure to include the first one when customizing anything, since some
    MSAN versions don't call abort() on error, and we need a way to detect
    faults.
--- a/docs/historical_notes.txt
+++ b/docs/historical_notes.txt
@ -0,0 +1,147 @@
 ================
 Historical notes
 ================
  This doc talks about the rationale of some of the high-level design decisions
  for American Fuzzy Lop. It's adopted from a discussion with Rob Graham.
  See README for the general instruction manual, and technical_details.txt for
  additional implementation-level insights.
 1) Influences
 -------------
 In short, afl-fuzz is inspired chiefly by the work done by Tavis Ormandy back
 in 2007. Tavis did some very persuasive experiments using gcov block coverage
 to select optimal test cases out of a large corpus of data, and then using
 them as a starting point for traditional fuzzing workflows.
 (By "persuasive", I mean: netting a significant number of interesting
 vulnerabilities.)
 In parallel to this, both Tavis and I were interested in evolutionary fuzzing.
 Tavis had his experiments, and I was working on a tool called bunny-the-fuzzer,
 released somewhere in 2007.
 Bunny used a generational algorithm not much different from afl-fuzz, but
 also tried to reason about the relationship between various input bits and
 the internal state of the program, with hopes of deriving some additional value
 from that. The reasoning / correlation part was probably in part inspired by
 other projects done around the same time by Will Drewry and Chris Evans.
 The state correlation approach sounded very sexy on paper, but ultimately, made
 the fuzzer complicated, brittle, and cumbersome to use; every other target
 program would require a tweak or two. Because Bunny didn't fare a whole lot
 better than less sophisticated brute-force tools, I eventually decided to write
 it off. You can still find its original documentation at:
  https://code.google.com/p/bunny-the-fuzzer/wiki/BunnyDoc
 There has been a fair amount of independent work, too. Most notably, a few
 weeks earlier that year, Jared DeMott had a Defcon presentation about a
 coverage-driven fuzzer that relied on coverage as a fitness function.
 Jared's approach was by no means identical to what afl-fuzz does, but it was in
 the same ballpark. His fuzzer tried to explicitly solve for the maximum coverage
 with a single input file; in comparison, afl simply selects for cases that do
 something new (which yields better results - see technical_details.txt).
 A few years later, Gabriel Campana released fuzzgrind, a tool that relied purely
 on Valgrind and a constraint solver to maximize coverage without any brute-force
 bits; and Microsoft Research folks talked extensively about their still
 non-public, solver-based SAGE framework.
 In the past six years or so, I've also seen a fair number of academic papers
 that dealt with smart fuzzing (focusing chiefly on symbolic execution) and a
 couple papers that discussed proof-of-concept applications of genetic
 algorithms with the same goals in mind. I'm unconvinced how practical most of
 these experiments were; I suspect that many of them suffer from the
 bunny-the-fuzzer's curse of being cool on paper and in carefully designed
 experiments, but failing the ultimate test of being able to find new,
 worthwhile security bugs in otherwise well-fuzzed, real-world software.
 In some ways, the baseline that the "cool" solutions have to compete against is
 a lot more impressive than it may seem, making it difficult for competitors to
 stand out. For a singular example, check out the work by Gynvael and Mateusz
 Jurczyk, applying "dumb" fuzzing to ffmpeg, a prominent and security-critical
 component of modern browsers and media players:
  http://googleonlinesecurity.blogspot.com/2014/01/ffmpeg-and-thousand-fixes.html
 Effortlessly getting comparable results with state-of-the-art symbolic execution
 in equally complex software still seems fairly unlikely, and hasn't been
 demonstrated in practice so far.
 But I digress; ultimately, attribution is hard, and glorying the fundamental
 concepts behind AFL is probably a waste of time. The devil is very much in the
 often-overlooked details, which brings us to...
 2) Design goals for afl-fuzz
 ----------------------------
 In short, I believe that the current implementation of afl-fuzz takes care of
 several itches that seemed impossible to scratch with other tools:
 1) Speed. It's genuinely hard to compete with brute force when your "smart"
   approach is resource-intensive. If your instrumentation makes it 10x more
   likely to find a bug, but runs 100x slower, your users are getting a bad
   deal.
   To avoid starting with a handicap, afl-fuzz is meant to let you fuzz most of
   the intended targets at roughly their native speed - so even if it doesn't
   add value, you do not lose much.
   On top of this, the tool leverages instrumentation to actually reduce the
   amount of work in a couple of ways: for example, by carefully trimming the
   corpus or skipping non-functional but non-trimmable regions in the input
   files.
 2) Rock-solid reliability. It's hard to compete with brute force if your
   approach is brittle and fails unexpectedly. Automated testing is attractive
   because it's simple to use and scalable; anything that goes against these
   principles is an unwelcome trade-off and means that your tool will be used
   less often and with less consistent results.
   Most of the approaches based on symbolic execution, taint tracking, or
   complex syntax-aware instrumentation are currently fairly unreliable with
   real-world targets. Perhaps more importantly, their failure modes can render
   them strictly worse than "dumb" tools, and such degradation can be difficult
   for less experienced users to notice and correct.
   In contrast, afl-fuzz is designed to be rock solid, chiefly by keeping it
   simple. In fact, at its core, it's designed to be just a very good
   traditional fuzzer with a wide range of interesting, well-researched
   strategies to go by. The fancy parts just help it focus the effort in
   places where it matters the most.
 3) Simplicity. The author of a testing framework is probably the only person
   who truly understands the impact of all the settings offered by the tool -
   and who can dial them in just right. Yet, even the most rudimentary fuzzer
   frameworks often come with countless knobs and fuzzing ratios that need to
   be guessed by the operator ahead of the time. This can do more harm than 
   good.
   AFL is designed to avoid this as much as possible. The three knobs you
   can play with are the output file, the memory limit, and the ability to
   override the default, auto-calibrated timeout. The rest is just supposed to
   work. When it doesn't, user-friendly error messages outline the probable
   causes and workarounds, and get you back on track right away.
 4) Chainability. Most general-purpose fuzzers can't be easily employed
   against resource-hungry or interaction-heavy tools, necessitating the
   creation of custom in-process fuzzers or the investment of massive CPU
   power (most of which is wasted on tasks not directly related to the code
   we actually want to test).
   AFL tries to scratch this itch by allowing users to use more lightweight
   targets (e.g., standalone image parsing libraries) to create small
   corpora of interesting test cases that can be fed into a manual testing
   process or a UI harness later on.
 As mentioned in technical_details.txt, AFL does all this not by systematically
 applying a single overarching CS concept, but by experimenting with a variety
 of small, complementary methods that were shown to reliably yields results
 better than chance. The use of instrumentation is a part of that toolkit, but is
 far from being the most important one.
 Ultimately, what matters is that afl-fuzz is designed to find cool bugs - and
 has a pretty robust track record of doing just that.
--- a/docs/life_pro_tips.txt
+++ b/docs/life_pro_tips.txt
@ -0,0 +1,128 @@
 # ===================
 # AFL "Life Pro Tips"
 # ===================
 #
 # Bite-sized advice for those who understand the basics, but can't be bothered
 # to read or memorize every other piece of documentation for AFL.
 #
 %
 Get more bang for your buck by using fuzzing dictionaries.
 See dictionaries/README.dictionaries to learn how.
 %
 You can get the most out of your hardware by parallelizing AFL jobs.
 See docs/parallel_fuzzing.txt for step-by-step tips.
 %
 Improve the odds of spotting memory corruption bugs with libdislocator.so!
 It's easy. Consult libdislocator/README.dislocator for usage tips.
 %
 Want to understand how your target parses a particular input file?
 Try the bundled afl-analyze tool; it's got colors and all!
 %
 You can visually monitor the progress of your fuzzing jobs.
 Run the bundled afl-plot utility to generate browser-friendly graphs.
 %
 Need to monitor AFL jobs programmatically? Check out the fuzzer_stats file
 in the AFL output dir or try afl-whatsup.
 %
 Puzzled by something showing up in red or purple in the AFL UI?
 It could be important - consult docs/status_screen.txt right away!
 %
 Know your target? Convert it to persistent mode for a huge performance gain!
 Consult section #5 in llvm_mode/README.llvm for tips.
 %
 Using clang? Check out llvm_mode/ for a faster alternative to afl-gcc!
 %
 Did you know that AFL can fuzz closed-source or cross-platform binaries?
 Check out qemu_mode/README.qemu for more.
 %
 Did you know that afl-fuzz can minimize any test case for you?
 Try the bundled afl-tmin tool - and get small repro files fast!
 %
 Not sure if a crash is exploitable? AFL can help you figure it out. Specify
 -C to enable the peruvian were-rabbit mode. See section #10 in README for more.
 %
 Trouble dealing with a machine uprising? Relax, we've all been there.
 Find essential survival tips at http://lcamtuf.coredump.cx/prep/.
 %
 AFL-generated corpora can be used to power other testing processes.
 See section #2 in README for inspiration - it tends to pay off!
 %
 Want to automatically spot non-crashing memory handling bugs?
 Try running an AFL-generated corpus through ASAN, MSAN, or Valgrind.
 %
 Good selection of input files is critical to a successful fuzzing job.
 See section #5 in README (or docs/perf_tips.txt) for pro tips.
 %
 You can improve the odds of automatically spotting stack corruption issues.
 Specify AFL_HARDEN=1 in the environment to enable hardening flags.
 %
 Bumping into problems with non-reproducible crashes? It happens, but usually
 isn't hard to diagnose. See section #7 in README for tips.
 %
 Fuzzing is not just about memory corruption issues in the codebase. Add some
 sanity-checking assert() / abort() statements to effortlessly catch logic bugs.
 %
 Hey kid... pssst... want to figure out how AFL really works?
 Check out docs/technical_details.txt for all the gory details in one place!
 %
 There's a ton of third-party helper tools designed to work with AFL!
 Be sure to check out docs/sister_projects.txt before writing your own.
 %
 Need to fuzz the command-line arguments of a particular program?
 You can find a simple solution in experimental/argv_fuzzing.
 %
 Attacking a format that uses checksums? Remove the checksum-checking code or
 use a postprocessor! See experimental/post_library/ for more.
 %
 Dealing with a very slow target or hoping for instant results? Specify -d
 when calling afl-fuzz!
 %
--- a/docs/notes_for_asan.txt
+++ b/docs/notes_for_asan.txt
@ -0,0 +1,143 @@
 ==================================
 Notes for using ASAN with afl-fuzz
 ==================================
  This file discusses some of the caveats for fuzzing under ASAN, and suggests
  a handful of alternatives. See README for the general instruction manual.
 1) Short version
 ----------------
 ASAN on 64-bit systems requests a lot of memory in a way that can't be easily
 distinguished from a misbehaving program bent on crashing your system.
 Because of this, fuzzing with ASAN is recommended only in four scenarios:
  - On 32-bit systems, where we can always enforce a reasonable memory limit
    (-m 800 or so is a good starting point),
  - On 64-bit systems only if you can do one of the following:
    - Compile the binary in 32-bit mode (gcc -m32),
    - Precisely gauge memory needs using http://jwilk.net/software/recidivm .
    - Limit the memory available to process using cgroups on Linux (see
      experimental/asan_cgroups).
 To compile with ASAN, set AFL_USE_ASAN=1 before calling 'make clean all'. The
 afl-gcc / afl-clang wrappers will pick that up and add the appropriate flags.
 Note that ASAN is incompatible with -static, so be mindful of that.
 (You can also use AFL_USE_MSAN=1 to enable MSAN instead.)
 There is also the option of generating a corpus using a non-ASAN binary, and
 then feeding it to an ASAN-instrumented one to check for bugs. This is faster,
 and can give you somewhat comparable results. You can also try using
 libdislocator (see libdislocator/README.dislocator in the parent directory) as a
 lightweight and hassle-free (but less thorough) alternative.
 2) Long version
 ---------------
 ASAN allocates a huge region of virtual address space for bookkeeping purposes.
 Most of this is never actually accessed, so the OS never has to allocate any
 real pages of memory for the process, and the VM grabbed by ASAN is essentially
 "free" - but the mapping counts against the standard OS-enforced limit
 (RLIMIT_AS, aka ulimit -v).
 On our end, afl-fuzz tries to protect you from processes that go off-rails
 and start consuming all the available memory in a vain attempt to parse a
 malformed input file. This happens surprisingly often, so enforcing such a limit
 is important for almost any fuzzer: the alternative is for the kernel OOM
 handler to step in and start killing random processes to free up resources.
 Needless to say, that's not a very nice prospect to live with.
 Unfortunately, un*x systems offer no portable way to limit the amount of
 pages actually given to a process in a way that distinguishes between that
 and the harmless "land grab" done by ASAN. In principle, there are three standard
 ways to limit the size of the heap:
  - The RLIMIT_AS mechanism (ulimit -v) caps the size of the virtual space -
    but as noted, this pays no attention to the number of pages actually
    in use by the process, and doesn't help us here.
  - The RLIMIT_DATA mechanism (ulimit -d) seems like a good fit, but it applies
    only to the traditional sbrk() / brk() methods of requesting heap space;
    modern allocators, including the one in glibc, routinely rely on mmap()
    instead, and circumvent this limit completely.
  - Finally, the RLIMIT_RSS limit (ulimit -m) sounds like what we need, but
    doesn't work on Linux - mostly because nobody felt like implementing it.
 There are also cgroups, but they are Linux-specific, not universally available
 even on Linux systems, and they require root permissions to set up; I'm a bit
 hesitant to make afl-fuzz require root permissions just for that. That said,
 if you are on Linux and want to use cgroups, check out the contributed script
 that ships in experimental/asan_cgroups/.
 In settings where cgroups aren't available, we have no nice, portable way to
 avoid counting the ASAN allocation toward the limit. On 32-bit systems, or for
 binaries compiled in 32-bit mode (-m32), this is not a big deal: ASAN needs
 around 600-800 MB or so, depending on the compiler - so all you need to do is
 to specify -m that is a bit higher than that.
 On 64-bit systems, the situation is more murky, because the ASAN allocation
 is completely outlandish - around 17.5 TB in older versions, and closer to
 20 TB with newest ones. The actual amount of memory on your system is
 (probably!) just a tiny fraction of that - so unless you dial the limit
 with surgical precision, you will get no protection from OOM bugs.
 On my system, the amount of memory grabbed by ASAN with a slightly older
 version of gcc is around 17,825,850 MB; for newest clang, it's 20,971,600.
 But there is no guarantee that these numbers are stable, and if you get them
 wrong by "just" a couple gigs or so, you will be at risk.
 To get the precise number, you can use the recidivm tool developed by Jakub
 Wilk (http://jwilk.net/software/recidivm). In absence of this, ASAN is *not*
 recommended when fuzzing 64-bit binaries, unless you are confident that they
 are robust and enforce reasonable memory limits (in which case, you can
 specify '-m none' when calling afl-fuzz).
 Using recidivm or running with no limits aside, there are two other decent
 alternatives: build a corpus of test cases using a non-ASAN binary, and then
 examine them with ASAN, Valgrind, or other heavy-duty tools in a more
 controlled setting; or compile the target program with -m32 (32-bit mode)
 if your system supports that.
 3) Interactions with the QEMU mode
 ----------------------------------
 ASAN, MSAN, and other sanitizers appear to be incompatible with QEMU user
 emulation, so please do not try to use them with the -Q option; QEMU doesn't
 seem to appreciate the shadow VM trick used by these tools, and will likely
 just allocate all your physical memory, then crash.
 4) ASAN and OOM crashes
 -----------------------
 By default, ASAN treats memory allocation failures as fatal errors, immediately
 causing the program to crash. Since this is a departure from normal POSIX
 semantics (and creates the appearance of security issues in otherwise
 properly-behaving programs), we try to disable this by specifying 
 allocator_may_return_null=1 in ASAN_OPTIONS.
 Unfortunately, it's been reported that this setting still causes ASAN to
 trigger phantom crashes in situations where the standard allocator would
 simply return NULL. If this is interfering with your fuzzing jobs, you may
 want to cc: yourself on this bug:
  https://bugs.llvm.org/show_bug.cgi?id=22026
 5) What about UBSAN?
 --------------------
 Some folks expressed interest in fuzzing with UBSAN. This isn't officially
 supported, because many installations of UBSAN don't offer a consistent way
 to abort() on fault conditions or to terminate with a distinctive exit code.
 That said, some versions of the library can be binary-patched to address this
 issue, while newer releases support explicit compile-time flags - see this
 mailing list thread for tips:
  https://groups.google.com/forum/#!topic/afl-users/GyeSBJt4M38
--- a/docs/parallel_fuzzing.txt
+++ b/docs/parallel_fuzzing.txt
@ -0,0 +1,216 @@
 =========================
 Tips for parallel fuzzing
 =========================
  This document talks about synchronizing afl-fuzz jobs on a single machine
  or across a fleet of systems. See README for the general instruction manual.
 1) Introduction
 ---------------
 Every copy of afl-fuzz will take up one CPU core. This means that on an
 n-core system, you can almost always run around n concurrent fuzzing jobs with
 virtually no performance hit (you can use the afl-gotcpu tool to make sure).
 In fact, if you rely on just a single job on a multi-core system, you will
 be underutilizing the hardware. So, parallelization is usually the right
 way to go.
 When targeting multiple unrelated binaries or using the tool in "dumb" (-n)
 mode, it is perfectly fine to just start up several fully separate instances
 of afl-fuzz. The picture gets more complicated when you want to have multiple
 fuzzers hammering a common target: if a hard-to-hit but interesting test case
 is synthesized by one fuzzer, the remaining instances will not be able to use
 that input to guide their work.
 To help with this problem, afl-fuzz offers a simple way to synchronize test
 cases on the fly.
 2) Single-system parallelization
 --------------------------------
 If you wish to parallelize a single job across multiple cores on a local
 system, simply create a new, empty output directory ("sync dir") that will be
 shared by all the instances of afl-fuzz; and then come up with a naming scheme
 for every instance - say, "fuzzer01", "fuzzer02", etc. 
 Run the first one ("master", -M) like this:
 $ ./afl-fuzz -i testcase_dir -o sync_dir -M fuzzer01 [...other stuff...]
 ...and then, start up secondary (-S) instances like this:
 $ ./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer02 [...other stuff...]
 $ ./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer03 [...other stuff...]
 Each fuzzer will keep its state in a separate subdirectory, like so:
  /path/to/sync_dir/fuzzer01/
 Each instance will also periodically rescan the top-level sync directory
 for any test cases found by other fuzzers - and will incorporate them into
 its own fuzzing when they are deemed interesting enough.
 The difference between the -M and -S modes is that the master instance will
 still perform deterministic checks; while the secondary instances will
 proceed straight to random tweaks. If you don't want to do deterministic
 fuzzing at all, it's OK to run all instances with -S. With very slow or complex
 targets, or when running heavily parallelized jobs, this is usually a good plan.
 Note that running multiple -M instances is wasteful, although there is an
 experimental support for parallelizing the deterministic checks. To leverage
 that, you need to create -M instances like so:
 $ ./afl-fuzz -i testcase_dir -o sync_dir -M masterA:1/3 [...]
 $ ./afl-fuzz -i testcase_dir -o sync_dir -M masterB:2/3 [...]
 $ ./afl-fuzz -i testcase_dir -o sync_dir -M masterC:3/3 [...]
 ...where the first value after ':' is the sequential ID of a particular master
 instance (starting at 1), and the second value is the total number of fuzzers to
 distribute the deterministic fuzzing across. Note that if you boot up fewer
 fuzzers than indicated by the second number passed to -M, you may end up with
 poor coverage.
 You can also monitor the progress of your jobs from the command line with the
 provided afl-whatsup tool. When the instances are no longer finding new paths,
 it's probably time to stop.
 WARNING: Exercise caution when explicitly specifying the -f option. Each fuzzer
 must use a separate temporary file; otherwise, things will go south. One safe
 example may be:
 $ ./afl-fuzz [...] -S fuzzer10 -f file10.txt ./fuzzed/binary @@
 $ ./afl-fuzz [...] -S fuzzer11 -f file11.txt ./fuzzed/binary @@
 $ ./afl-fuzz [...] -S fuzzer12 -f file12.txt ./fuzzed/binary @@
 This is not a concern if you use @@ without -f and let afl-fuzz come up with the
 file name.
 3) Multi-system parallelization
 -------------------------------
 The basic operating principle for multi-system parallelization is similar to
 the mechanism explained in section 2. The key difference is that you need to
 write a simple script that performs two actions:
  - Uses SSH with authorized_keys to connect to every machine and retrieve
    a tar archive of the /path/to/sync_dir/<fuzzer_id>/queue/ directories for
    every <fuzzer_id> local to the machine. It's best to use a naming scheme
    that includes host name in the fuzzer ID, so that you can do something
    like:
    for s in {1..10}; do
      ssh user@host${s} "tar -czf - sync/host${s}_fuzzid*/[qf]*" >host${s}.tgz
    done
  - Distributes and unpacks these files on all the remaining machines, e.g.:
    for s in {1..10}; do
      for d in {1..10}; do
        test "$s" = "$d" && continue
        ssh user@host${d} 'tar -kxzf -' <host${s}.tgz
      done
    done
 There is an example of such a script in experimental/distributed_fuzzing/;
 you can also find a more featured, experimental tool developed by
 Martijn Bogaard at:
  https://github.com/MartijnB/disfuzz-afl
 Another client-server implementation from Richo Healey is:
  https://github.com/richo/roving
 Note that these third-party tools are unsafe to run on systems exposed to the
 Internet or to untrusted users.
 When developing custom test case sync code, there are several optimizations
 to keep in mind:
  - The synchronization does not have to happen very often; running the
    task every 30 minutes or so may be perfectly fine.
  - There is no need to synchronize crashes/ or hangs/; you only need to
    copy over queue/* (and ideally, also fuzzer_stats).
  - It is not necessary (and not advisable!) to overwrite existing files;
    the -k option in tar is a good way to avoid that.
  - There is no need to fetch directories for fuzzers that are not running
    locally on a particular machine, and were simply copied over onto that
    system during earlier runs.
  - For large fleets, you will want to consolidate tarballs for each host,
    as this will let you use n SSH connections for sync, rather than n*(n-1).
    You may also want to implement staged synchronization. For example, you
    could have 10 groups of systems, with group 1 pushing test cases only
    to group 2; group 2 pushing them only to group 3; and so on, with group
    eventually 10 feeding back to group 1.
    This arrangement would allow test interesting cases to propagate across
    the fleet without having to copy every fuzzer queue to every single host.
  - You do not want a "master" instance of afl-fuzz on every system; you should
    run them all with -S, and just designate a single process somewhere within
    the fleet to run with -M.
 It is *not* advisable to skip the synchronization script and run the fuzzers
 directly on a network filesystem; unexpected latency and unkillable processes
 in I/O wait state can mess things up.
 4) Remote monitoring and data collection
 ----------------------------------------
 You can use screen, nohup, tmux, or something equivalent to run remote
 instances of afl-fuzz. If you redirect the program's output to a file, it will
 automatically switch from a fancy UI to more limited status reports. There is
 also basic machine-readable information always written to the fuzzer_stats file
 in the output directory. Locally, that information can be interpreted with
 afl-whatsup.
 In principle, you can use the status screen of the master (-M) instance to
 monitor the overall fuzzing progress and decide when to stop. In this
 mode, the most important signal is just that no new paths are being found
 for a longer while. If you do not have a master instance, just pick any
 single secondary instance to watch and go by that.
 You can also rely on that instance's output directory to collect the
 synthesized corpus that covers all the noteworthy paths discovered anywhere
 within the fleet. Secondary (-S) instances do not require any special
 monitoring, other than just making sure that they are up.
 Keep in mind that crashing inputs are *not* automatically propagated to the
 master instance, so you may still want to monitor for crashes fleet-wide
 from within your synchronization or health checking scripts (see afl-whatsup).
 5) Asymmetric setups
 --------------------
 It is perhaps worth noting that all of the following is permitted:
  - Running afl-fuzz with conjunction with other guided tools that can extend
    coverage (e.g., via concolic execution). Third-party tools simply need to
    follow the protocol described above for pulling new test cases from
    out_dir/<fuzzer_id>/queue/* and writing their own finds to sequentially
    numbered id:nnnnnn files in out_dir/<ext_tool_id>/queue/*.
  - Running some of the synchronized fuzzers with different (but related)
    target binaries. For example, simultaneously stress-testing several
    different JPEG parsers (say, IJG jpeg and libjpeg-turbo) while sharing
    the discovered test cases can have synergistic effects and improve the
    overall coverage.
    (In this case, running one -M instance per each binary is a good plan.)
  - Having some of the fuzzers invoke the binary in different ways.
    For example, 'djpeg' supports several DCT modes, configurable with
    a command-line flag, while 'dwebp' supports incremental and one-shot
    decoding. In some scenarios, going after multiple distinct modes and then
    pooling test cases will improve coverage.
  - Much less convincingly, running the synchronized fuzzers with different
    starting test cases (e.g., progressive and standard JPEG) or dictionaries.
    The synchronization mechanism ensures that the test sets will get fairly
    homogeneous over time, but it introduces some initial variability.
--- a/docs/perf_tips.txt
+++ b/docs/perf_tips.txt
@ -0,0 +1,201 @@
 =================================
 Tips for performance optimization
 =================================
  This file provides tips for troubleshooting slow or wasteful fuzzing jobs.
  See README for the general instruction manual.
 1) Keep your test cases small
 -----------------------------
 This is probably the single most important step to take! Large test cases do
 not merely take more time and memory to be parsed by the tested binary, but
 also make the fuzzing process dramatically less efficient in several other
 ways.
 To illustrate, let's say that you're randomly flipping bits in a file, one bit
 at a time. Let's assume that if you flip bit #47, you will hit a security bug;
 flipping any other bit just results in an invalid document.
 Now, if your starting test case is 100 bytes long, you will have a 71% chance of
 triggering the bug within the first 1,000 execs - not bad! But if the test case
 is 1 kB long, the probability that we will randomly hit the right pattern in
 the same timeframe goes down to 11%. And if it has 10 kB of non-essential
 cruft, the odds plunge to 1%.
 On top of that, with larger inputs, the binary may be now running 5-10x times
 slower than before - so the overall drop in fuzzing efficiency may be easily
 as high as 500x or so.
 In practice, this means that you shouldn't fuzz image parsers with your
 vacation photos. Generate a tiny 16x16 picture instead, and run it through
 jpegtran or pngcrunch for good measure. The same goes for most other types
 of documents.
 There's plenty of small starting test cases in ../testcases/* - try them out
 or submit new ones!
 If you want to start with a larger, third-party corpus, run afl-cmin with an
 aggressive timeout on that data set first.
 2) Use a simpler target
 -----------------------
 Consider using a simpler target binary in your fuzzing work. For example, for
 image formats, bundled utilities such as djpeg, readpng, or gifhisto are
 considerably (10-20x) faster than the convert tool from ImageMagick - all while
 exercising roughly the same library-level image parsing code.
 Even if you don't have a lightweight harness for a particular target, remember
 that you can always use another, related library to generate a corpus that will
 be then manually fed to a more resource-hungry program later on.
 3) Use LLVM instrumentation
 ---------------------------
 When fuzzing slow targets, you can gain 2x performance improvement by using
 the LLVM-based instrumentation mode described in llvm_mode/README.llvm. Note
 that this mode requires the use of clang and will not work with GCC.
 The LLVM mode also offers a "persistent", in-process fuzzing mode that can
 work well for certain types of self-contained libraries, and for fast targets,
 can offer performance gains up to 5-10x; and a "deferred fork server" mode
 that can offer huge benefits for programs with high startup overhead. Both
 modes require you to edit the source code of the fuzzed program, but the
 changes often amount to just strategically placing a single line or two.
 4) Profile and optimize the binary
 ----------------------------------
 Check for any parameters or settings that obviously improve performance. For
 example, the djpeg utility that comes with IJG jpeg and libjpeg-turbo can be
 called with:
  -dct fast -nosmooth -onepass -dither none -scale 1/4
 ...and that will speed things up. There is a corresponding drop in the quality
 of decoded images, but it's probably not something you care about.
 In some programs, it is possible to disable output altogether, or at least use
 an output format that is computationally inexpensive. For example, with image
 transcoding tools, converting to a BMP file will be a lot faster than to PNG.
 With some laid-back parsers, enabling "strict" mode (i.e., bailing out after
 first error) may result in smaller files and improved run time without
 sacrificing coverage; for example, for sqlite, you may want to specify -bail.
 If the program is still too slow, you can use strace -tt or an equivalent
 profiling tool to see if the targeted binary is doing anything silly.
 Sometimes, you can speed things up simply by specifying /dev/null as the
 config file, or disabling some compile-time features that aren't really needed
 for the job (try ./configure --help). One of the notoriously resource-consuming
 things would be calling other utilities via exec*(), popen(), system(), or
 equivalent calls; for example, tar can invoke external decompression tools
 when it decides that the input file is a compressed archive.
 Some programs may also intentionally call sleep(), usleep(), or nanosleep();
 vim is a good example of that. Other programs may attempt fsync() and so on.
 There are third-party libraries that make it easy to get rid of such code,
 e.g.:
  https://launchpad.net/libeatmydata
 In programs that are slow due to unavoidable initialization overhead, you may
 want to try the LLVM deferred forkserver mode (see llvm_mode/README.llvm),
 which can give you speed gains up to 10x, as mentioned above.
 Last but not least, if you are using ASAN and the performance is unacceptable,
 consider turning it off for now, and manually examining the generated corpus
 with an ASAN-enabled binary later on.
 5) Instrument just what you need
 --------------------------------
 Instrument just the libraries you actually want to stress-test right now, one
 at a time. Let the program use system-wide, non-instrumented libraries for
 any functionality you don't actually want to fuzz. For example, in most
 cases, it doesn't make to instrument libgmp just because you're testing a
 crypto app that relies on it for bignum math.
 Beware of programs that come with oddball third-party libraries bundled with
 their source code (Spidermonkey is a good example of this). Check ./configure
 options to use non-instrumented system-wide copies instead.
 6) Parallelize your fuzzers
 ---------------------------
 The fuzzer is designed to need ~1 core per job. This means that on a, say,
 4-core system, you can easily run four parallel fuzzing jobs with relatively
 little performance hit. For tips on how to do that, see parallel_fuzzing.txt.
 The afl-gotcpu utility can help you understand if you still have idle CPU
 capacity on your system. (It won't tell you about memory bandwidth, cache
 misses, or similar factors, but they are less likely to be a concern.)
 7) Keep memory use and timeouts in check
 ----------------------------------------
 If you have increased the -m or -t limits more than truly necessary, consider
 dialing them back down.
 For programs that are nominally very fast, but get sluggish for some inputs,
 you can also try setting -t values that are more punishing than what afl-fuzz
 dares to use on its own. On fast and idle machines, going down to -t 5 may be
 a viable plan.
 The -m parameter is worth looking at, too. Some programs can end up spending
 a fair amount of time allocating and initializing megabytes of memory when
 presented with pathological inputs. Low -m values can make them give up sooner
 and not waste CPU time.
 8) Check OS configuration
 -------------------------
 There are several OS-level factors that may affect fuzzing speed:
  - High system load. Use idle machines where possible. Kill any non-essential
    CPU hogs (idle browser windows, media players, complex screensavers, etc).
  - Network filesystems, either used for fuzzer input / output, or accessed by
    the fuzzed binary to read configuration files (pay special attention to the
    home directory - many programs search it for dot-files).
  - On-demand CPU scaling. The Linux 'ondemand' governor performs its analysis
    on a particular schedule and is known to underestimate the needs of
    short-lived processes spawned by afl-fuzz (or any other fuzzer). On Linux,
    this can be fixed with:
    cd /sys/devices/system/cpu
    echo performance | tee cpu*/cpufreq/scaling_governor
    On other systems, the impact of CPU scaling will be different; when fuzzing,
    use OS-specific tools to find out if all cores are running at full speed.
  - Transparent huge pages. Some allocators, such as jemalloc, can incur a
    heavy fuzzing penalty when transparent huge pages (THP) are enabled in the
    kernel. You can disable this via:
    echo never > /sys/kernel/mm/transparent_hugepage/enabled
  - Suboptimal scheduling strategies. The significance of this will vary from
    one target to another, but on Linux, you may want to make sure that the
    following options are set:
    echo 1 >/proc/sys/kernel/sched_child_runs_first
    echo 1 >/proc/sys/kernel/sched_autogroup_enabled
    Setting a different scheduling policy for the fuzzer process - say
    SCHED_RR - can usually speed things up, too, but needs to be done with
    care.
 9) If all other options fail, use -d
 ------------------------------------
 For programs that are genuinely slow, in cases where you really can't escape
 using huge input files, or when you simply want to get quick and dirty results
 early on, you can always resort to the -d mode.
 The mode causes afl-fuzz to skip all the deterministic fuzzing steps, which
 makes output a lot less neat and can ultimately make the testing a bit less
 in-depth, but it will give you an experience more familiar from other fuzzing
 tools.
--- a/docs/sister_projects.txt
+++ b/docs/sister_projects.txt
@ -0,0 +1,354 @@
 ===============
 Sister projects
 ===============
  This doc lists some of the projects that are inspired by, derived from,
  designed for, or meant to integrate with AFL. See README for the general
  instruction manual.
 -------------------------------------------
 Support for other languages / environments:
 -------------------------------------------
 Python AFL (Jakub Wilk)
 -----------------------
  Allows fuzz-testing of Python programs. Uses custom instrumentation and its
  own forkserver.
  http://jwilk.net/software/python-afl
 Go-fuzz (Dmitry Vyukov)
 -----------------------
  AFL-inspired guided fuzzing approach for Go targets:
  https://github.com/dvyukov/go-fuzz
 afl.rs (Keegan McAllister)
 --------------------------
  Allows Rust features to be easily fuzzed with AFL (using the LLVM mode).
  https://github.com/kmcallister/afl.rs
 OCaml support (KC Sivaramakrishnan)
 -----------------------------------
  Adds AFL-compatible instrumentation to OCaml programs.
  https://github.com/ocamllabs/opam-repo-dev/pull/23
  http://canopy.mirage.io/Posts/Fuzzing
 AFL for GCJ Java and other GCC frontends (-)
 --------------------------------------------
  GCC Java programs are actually supported out of the box - simply rename
  afl-gcc to afl-gcj. Unfortunately, by default, unhandled exceptions in GCJ do
  not result in abort() being called, so you will need to manually add a
  top-level exception handler that exits with SIGABRT or something equivalent.
  Other GCC-supported languages should be fairly easy to get working, but may
  face similar problems. See https://gcc.gnu.org/frontends.html for a list of
  options.
 AFL-style in-process fuzzer for LLVM (Kostya Serebryany)
 --------------------------------------------------------
  Provides an evolutionary instrumentation-guided fuzzing harness that allows
  some programs to be fuzzed without the fork / execve overhead. (Similar
  functionality is now available as the "persistent" feature described in
  ../llvm_mode/README.llvm.)
  http://llvm.org/docs/LibFuzzer.html
 AFL fixup shim (Ben Nagy)
 -------------------------
  Allows AFL_POST_LIBRARY postprocessors to be written in arbitrary languages
  that don't have C / .so bindings. Includes examples in Go.
  https://github.com/bnagy/aflfix
 TriforceAFL (Tim Newsham and Jesse Hertz)
 -----------------------------------------
  Leverages QEMU full system emulation mode to allow AFL to target operating
  systems and other alien worlds:
  https://www.nccgroup.trust/us/about-us/newsroom-and-events/blog/2016/june/project-triforce-run-afl-on-everything/
 WinAFL (Ivan Fratric)
 ---------------------
  As the name implies, allows you to fuzz Windows binaries (using DynamoRio).
  https://github.com/ivanfratric/winafl
  Another Windows alternative may be:
  https://github.com/carlosgprado/BrundleFuzz/
 ----------------
 Network fuzzing:
 ----------------
 Preeny (Yan Shoshitaishvili)
 ----------------------------
  Provides a fairly simple way to convince dynamically linked network-centric
  programs to read from a file or not fork. Not AFL-specific, but described as
  useful by many users. Some assembly required.
  https://github.com/zardus/preeny
 -------------------------------------------
 Distributed fuzzing and related automation:
 -------------------------------------------
 roving (Richo Healey)
 ---------------------
  A client-server architecture for effortlessly orchestrating AFL runs across
  a fleet of machines. You don't want to use this on systems that face the
  Internet or live in other untrusted environments.
  https://github.com/richo/roving
 Distfuzz-AFL (Martijn Bogaard)
 ------------------------------
  Simplifies the management of afl-fuzz instances on remote machines. The
  author notes that the current implementation isn't secure and should not
  be exposed on the Internet.
  https://github.com/MartijnB/disfuzz-afl
 AFLDFF (quantumvm)
 ------------------
  A nice GUI for managing AFL jobs.
  https://github.com/quantumvm/AFLDFF
 afl-launch (Ben Nagy)
 ---------------------
  Batch AFL launcher utility with a simple CLI.
  https://github.com/bnagy/afl-launch
 AFL Utils (rc0r)
 ----------------
  Simplifies the triage of discovered crashes, start parallel instances, etc.
  https://github.com/rc0r/afl-utils
  Another crash triage tool:
  https://github.com/floyd-fuh/afl-crash-analyzer
 afl-fuzzing-scripts (Tobias Ospelt)
 -----------------------------------
  Simplifies starting up multiple parallel AFL jobs.
  https://github.com/floyd-fuh/afl-fuzzing-scripts/
 afl-sid (Jacek Wielemborek)
 ---------------------------
  Allows users to more conveniently build and deploy AFL via Docker.
  https://github.com/d33tah/afl-sid
  Another Docker-related project:
  https://github.com/ozzyjohnson/docker-afl
 afl-monitor (Paul S. Ziegler)
 -----------------------------
  Provides more detailed and versatile statistics about your running AFL jobs.
  https://github.com/reflare/afl-monitor
 -----------------------------------------------------------
 Crash triage, coverage analysis, and other companion tools:
 -----------------------------------------------------------
 afl-crash-analyzer (Tobias Ospelt)
 ----------------------------------
  Makes it easier to navigate and annotate crashing test cases.
  https://github.com/floyd-fuh/afl-crash-analyzer/
 Crashwalk (Ben Nagy)
 --------------------
  AFL-aware tool to annotate and sort through crashing test cases.
  https://github.com/bnagy/crashwalk
 afl-cov (Michael Rash)
 ----------------------
  Produces human-readable coverage data based on the output queue of afl-fuzz.
  https://github.com/mrash/afl-cov
 afl-sancov (Bhargava Shastry)
 -----------------------------
  Similar to afl-cov, but uses clang sanitizer instrumentation.
  https://github.com/bshastry/afl-sancov
 RecidiVM (Jakub Wilk)
 ---------------------
  Makes it easy to estimate memory usage limits when fuzzing with ASAN or MSAN.
  http://jwilk.net/software/recidivm
 aflize (Jacek Wielemborek)
 --------------------------
  Automatically build AFL-enabled versions of Debian packages.
  https://github.com/d33tah/aflize
 afl-ddmin-mod (Markus Teufelberger)
 -----------------------------------
  A variant of afl-tmin that uses a more sophisticated (but slower)
  minimization algorithm.
  https://github.com/MarkusTeufelberger/afl-ddmin-mod
 afl-kit (Kuang-che Wu)
 ----------------------
  Replacements for afl-cmin and afl-tmin with additional features, such
  as the ability to filter crashes based on stderr patterns.
  https://github.com/kcwu/afl-kit
 -------------------------------
 Narrow-purpose or experimental:
 -------------------------------
 Cygwin support (Ali Rizvi-Santiago)
 -----------------------------------
  Pretty self-explanatory. As per the author, this "mostly" ports AFL to
  Windows. Field reports welcome!
  https://github.com/arizvisa/afl-cygwin
 Pause and resume scripts (Ben Nagy)
 -----------------------------------
  Simple automation to suspend and resume groups of fuzzing jobs.
  https://github.com/bnagy/afl-trivia
 Static binary-only instrumentation (Aleksandar Nikolich)
 --------------------------------------------------------
  Allows black-box binaries to be instrumented statically (i.e., by modifying
  the binary ahead of the time, rather than translating it on the run). Author
  reports better performance compared to QEMU, but occasional translation
  errors with stripped binaries.
  https://github.com/vrtadmin/moflow/tree/master/afl-dyninst
 AFL PIN (Parker Thompson)
 -------------------------
  Early-stage Intel PIN instrumentation support (from before we settled on
  faster-running QEMU).
  https://github.com/mothran/aflpin
 AFL-style instrumentation in llvm (Kostya Serebryany)
 -----------------------------------------------------
  Allows AFL-equivalent instrumentation to be injected at compiler level.
  This is currently not supported by AFL as-is, but may be useful in other
  projects.
  https://code.google.com/p/address-sanitizer/wiki/AsanCoverage#Coverage_counters
 AFL JS (Han Choongwoo)
 ----------------------
  One-off optimizations to speed up the fuzzing of JavaScriptCore (now likely
  superseded by LLVM deferred forkserver init - see llvm_mode/README.llvm).
  https://github.com/tunz/afl-fuzz-js
 AFL harness for fwknop (Michael Rash)
 -------------------------------------
  An example of a fairly involved integration with AFL.
  https://github.com/mrash/fwknop/tree/master/test/afl
 Building harnesses for DNS servers (Jonathan Foote, Ron Bowes)
 --------------------------------------------------------------
  Two articles outlining the general principles and showing some example code.
  https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
  https://goo.gl/j9EgFf
 Fuzzer shell for SQLite (Richard Hipp)
 --------------------------------------
  A simple SQL shell designed specifically for fuzzing the underlying library.
  http://www.sqlite.org/src/artifact/9e7e273da2030371
 Support for Python mutation modules (Christian Holler)
 ------------------------------------------------------
  https://github.com/choller/afl/blob/master/docs/mozilla/python_modules.txt
 Support for selective instrumentation (Christian Holler)
 --------------------------------------------------------
  https://github.com/choller/afl/blob/master/docs/mozilla/partial_instrumentation.txt
 Kernel fuzzing (Dmitry Vyukov)
 ------------------------------
  A similar guided approach as applied to fuzzing syscalls:
  https://github.com/google/syzkaller/wiki/Found-Bugs
  https://github.com/dvyukov/linux/commit/33787098ffaaa83b8a7ccf519913ac5fd6125931
  http://events.linuxfoundation.org/sites/events/files/slides/AFL%20filesystem%20fuzzing%2C%20Vault%202016_0.pdf
 Android support (ele7enxxh)
 ---------------------------
  Based on a somewhat dated version of AFL:
  https://github.com/ele7enxxh/android-afl
 CGI wrapper (floyd)
 -------------------
  Facilitates the testing of CGI scripts.
  https://github.com/floyd-fuh/afl-cgi-wrapper
 Fuzzing difficulty estimation (Marcel Boehme)
 ---------------------------------------------
  A fork of AFL that tries to quantify the likelihood of finding additional
  paths or crashes at any point in a fuzzing job.
  https://github.com/mboehme/pythia
--- a/docs/status_screen.txt
+++ b/docs/status_screen.txt
@ -0,0 +1,408 @@
 ===============================
 Understanding the status screen
 ===============================
  This document provides an overview of the status screen - plus tips for
  troubleshooting any warnings and red text shown in the UI. See README for
  the general instruction manual.
 0) A note about colors
 ----------------------
 The status screen and error messages use colors to keep things readable and
 attract your attention to the most important details. For example, red almost
 always means "consult this doc" :-)
 Unfortunately, the UI will render correctly only if your terminal is using
 traditional un*x palette (white text on black background) or something close
 to that.
 If you are using inverse video, you may want to change your settings, say:
  - For GNOME Terminal, go to Edit > Profile preferences, select the "colors"
    tab, and from the list of built-in schemes, choose "white on black".
  - For the MacOS X Terminal app, open a new window using the "Pro" scheme via
    the Shell > New Window menu (or make "Pro" your default).
 Alternatively, if you really like your current colors, you can edit config.h
 to comment out USE_COLORS, then do 'make clean all'.
 I'm not aware of any other simple way to make this work without causing
 other side effects - sorry about that.
 With that out of the way, let's talk about what's actually on the screen...
 1) Process timing
 -----------------
  +----------------------------------------------------+
  |        run time : 0 days, 8 hrs, 32 min, 43 sec    |
  |   last new path : 0 days, 0 hrs, 6 min, 40 sec     |
  | last uniq crash : none seen yet                    |
  |  last uniq hang : 0 days, 1 hrs, 24 min, 32 sec    |
  +----------------------------------------------------+
 This section is fairly self-explanatory: it tells you how long the fuzzer has
 been running and how much time has elapsed since its most recent finds. This is
 broken down into "paths" (a shorthand for test cases that trigger new execution
 patterns), crashes, and hangs.
 When it comes to timing: there is no hard rule, but most fuzzing jobs should be
 expected to run for days or weeks; in fact, for a moderately complex project, the
 first pass will probably take a day or so. Every now and then, some jobs
 will be allowed to run for months.
 There's one important thing to watch out for: if the tool is not finding new
 paths within several minutes of starting, you're probably not invoking the
 target binary correctly and it never gets to parse the input files we're
 throwing at it; another possible explanations are that the default memory limit
 (-m) is too restrictive, and the program exits after failing to allocate a
 buffer very early on; or that the input files are patently invalid and always
 fail a basic header check.
 If there are no new paths showing up for a while, you will eventually see a big
 red warning in this section, too :-)
 2) Overall results
 ------------------
  +-----------------------+
  |  cycles done : 0      |
  |  total paths : 2095   |
  | uniq crashes : 0      |
  |   uniq hangs : 19     |
  +-----------------------+
 The first field in this section gives you the count of queue passes done so far
 - that is, the number of times the fuzzer went over all the interesting test
 cases discovered so far, fuzzed them, and looped back to the very beginning.
 Every fuzzing session should be allowed to complete at least one cycle; and
 ideally, should run much longer than that.
 As noted earlier, the first pass can take a day or longer, so sit back and
 relax. If you want to get broader but more shallow coverage right away, try
 the -d option - it gives you a more familiar experience by skipping the
 deterministic fuzzing steps. It is, however, inferior to the standard mode in
 a couple of subtle ways.
 To help make the call on when to hit Ctrl-C, the cycle counter is color-coded.
 It is shown in magenta during the first pass, progresses to yellow if new finds
 are still being made in subsequent rounds, then blue when that ends - and
 finally, turns green after the fuzzer hasn't been seeing any action for a
 longer while.
 The remaining fields in this part of the screen should be pretty obvious:
 there's the number of test cases ("paths") discovered so far, and the number of
 unique faults. The test cases, crashes, and hangs can be explored in real-time
 by browsing the output directory, as discussed in the README.
 3) Cycle progress
 -----------------
  +-------------------------------------+
  |  now processing : 1296 (61.86%)     |
  | paths timed out : 0 (0.00%)         |
  +-------------------------------------+
 This box tells you how far along the fuzzer is with the current queue cycle: it
 shows the ID of the test case it is currently working on, plus the number of
 inputs it decided to ditch because they were persistently timing out.
 The "*" suffix sometimes shown in the first line means that the currently
 processed path is not "favored" (a property discussed later on, in section 6).
 If you feel that the fuzzer is progressing too slowly, see the note about the
 -d option in section 2 of this doc.
 4) Map coverage
 ---------------
  +--------------------------------------+
  |    map density : 10.15% / 29.07%     |
  | count coverage : 4.03 bits/tuple     |
  +--------------------------------------+
 The section provides some trivia about the coverage observed by the
 instrumentation embedded in the target binary.
 The first line in the box tells you how many branch tuples we have already
 hit, in proportion to how much the bitmap can hold. The number on the left
 describes the current input; the one on the right is the value for the entire
 input corpus.
 Be wary of extremes:
  - Absolute numbers below 200 or so suggest one of three things: that the
    program is extremely simple; that it is not instrumented properly (e.g.,
    due to being linked against a non-instrumented copy of the target
    library); or that it is bailing out prematurely on your input test cases.
    The fuzzer will try to mark this in pink, just to make you aware.
  - Percentages over 70% may very rarely happen with very complex programs
    that make heavy use of template-generated code.
    Because high bitmap density makes it harder for the fuzzer to reliably
    discern new program states, I recommend recompiling the binary with
    AFL_INST_RATIO=10 or so and trying again (see env_variables.txt).
    The fuzzer will flag high percentages in red. Chances are, you will never
    see that unless you're fuzzing extremely hairy software (say, v8, perl,
    ffmpeg).
 The other line deals with the variability in tuple hit counts seen in the
 binary. In essence, if every taken branch is always taken a fixed number of
 times for all the inputs we have tried, this will read "1.00". As we manage
 to trigger other hit counts for every branch, the needle will start to move
 toward "8.00" (every bit in the 8-bit map hit), but will probably never
 reach that extreme.
 Together, the values can be useful for comparing the coverage of several
 different fuzzing jobs that rely on the same instrumented binary.
 5) Stage progress
 -----------------
  +-------------------------------------+
  |  now trying : interest 32/8         |
  | stage execs : 3996/34.4k (11.62%)   |
  | total execs : 27.4M                 |
  |  exec speed : 891.7/sec             |
  +-------------------------------------+
 This part gives you an in-depth peek at what the fuzzer is actually doing right
 now. It tells you about the current stage, which can be any of:
  - calibration - a pre-fuzzing stage where the execution path is examined
    to detect anomalies, establish baseline execution speed, and so on. Executed
    very briefly whenever a new find is being made.
  - trim L/S - another pre-fuzzing stage where the test case is trimmed to the
    shortest form that still produces the same execution path. The length (L)
    and stepover (S) are chosen in general relationship to file size.
  - bitflip L/S - deterministic bit flips. There are L bits toggled at any given
    time, walking the input file with S-bit increments. The current L/S variants
    are: 1/1, 2/1, 4/1, 8/8, 16/8, 32/8.
  - arith L/8 - deterministic arithmetics. The fuzzer tries to subtract or add
    small integers to 8-, 16-, and 32-bit values. The stepover is always 8 bits.
  - interest L/8 - deterministic value overwrite. The fuzzer has a list of known
    "interesting" 8-, 16-, and 32-bit values to try. The stepover is 8 bits.
  - extras - deterministic injection of dictionary terms. This can be shown as
    "user" or "auto", depending on whether the fuzzer is using a user-supplied
    dictionary (-x) or an auto-created one. You will also see "over" or "insert",
    depending on whether the dictionary words overwrite existing data or are
    inserted by offsetting the remaining data to accommodate their length.
  - havoc - a sort-of-fixed-length cycle with stacked random tweaks. The
    operations attempted during this stage include bit flips, overwrites with
    random and "interesting" integers, block deletion, block duplication, plus
    assorted dictionary-related operations (if a dictionary is supplied in the
    first place).
  - splice - a last-resort strategy that kicks in after the first full queue
    cycle with no new paths. It is equivalent to 'havoc', except that it first
    splices together two random inputs from the queue at some arbitrarily
    selected midpoint.
  - sync - a stage used only when -M or -S is set (see parallel_fuzzing.txt).
    No real fuzzing is involved, but the tool scans the output from other
    fuzzers and imports test cases as necessary. The first time this is done,
    it may take several minutes or so.
 The remaining fields should be fairly self-evident: there's the exec count
 progress indicator for the current stage, a global exec counter, and a
 benchmark for the current program execution speed. This may fluctuate from
 one test case to another, but the benchmark should be ideally over 500 execs/sec
 most of the time - and if it stays below 100, the job will probably take very
 long.
 The fuzzer will explicitly warn you about slow targets, too. If this happens,
 see the perf_tips.txt file included with the fuzzer for ideas on how to speed
 things up.
 6) Findings in depth
 --------------------
  +--------------------------------------+
  | favored paths : 879 (41.96%)         |
  |  new edges on : 423 (20.19%)         |
  | total crashes : 0 (0 unique)         |
  |  total tmouts : 24 (19 unique)       |
  +--------------------------------------+
 This gives you several metrics that are of interest mostly to complete nerds.
 The section includes the number of paths that the fuzzer likes the most based
 on a minimization algorithm baked into the code (these will get considerably
 more air time), and the number of test cases that actually resulted in better
 edge coverage (versus just pushing the branch hit counters up). There are also
 additional, more detailed counters for crashes and timeouts.
 Note that the timeout counter is somewhat different from the hang counter; this
 one includes all test cases that exceeded the timeout, even if they did not
 exceed it by a margin sufficient to be classified as hangs.
 7) Fuzzing strategy yields
 --------------------------
  +-----------------------------------------------------+
  |   bit flips : 57/289k, 18/289k, 18/288k             |
  |  byte flips : 0/36.2k, 4/35.7k, 7/34.6k             |
  | arithmetics : 53/2.54M, 0/537k, 0/55.2k             |
  |  known ints : 8/322k, 12/1.32M, 10/1.70M            |
  |  dictionary : 9/52k, 1/53k, 1/24k                   |
  |       havoc : 1903/20.0M, 0/0                       |
  |        trim : 20.31%/9201, 17.05%                   |
  +-----------------------------------------------------+
 This is just another nerd-targeted section keeping track of how many paths we
 have netted, in proportion to the number of execs attempted, for each of the
 fuzzing strategies discussed earlier on. This serves to convincingly validate
 assumptions about the usefulness of the various approaches taken by afl-fuzz.
 The trim strategy stats in this section are a bit different than the rest.
 The first number in this line shows the ratio of bytes removed from the input
 files; the second one corresponds to the number of execs needed to achieve this
 goal. Finally, the third number shows the proportion of bytes that, although
 not possible to remove, were deemed to have no effect and were excluded from
 some of the more expensive deterministic fuzzing steps.
 8) Path geometry
 ----------------
  +---------------------+
  |    levels : 5       |
  |   pending : 1570    |
  |  pend fav : 583     |
  | own finds : 0       |
  |  imported : 0       |
  | stability : 100.00% |
  +---------------------+
 The first field in this section tracks the path depth reached through the
 guided fuzzing process. In essence: the initial test cases supplied by the
 user are considered "level 1". The test cases that can be derived from that
 through traditional fuzzing are considered "level 2"; the ones derived by
 using these as inputs to subsequent fuzzing rounds are "level 3"; and so forth.
 The maximum depth is therefore a rough proxy for how much value you're getting
 out of the instrumentation-guided approach taken by afl-fuzz.
 The next field shows you the number of inputs that have not gone through any
 fuzzing yet. The same stat is also given for "favored" entries that the fuzzer
 really wants to get to in this queue cycle (the non-favored entries may have to
 wait a couple of cycles to get their chance).
 Next, we have the number of new paths found during this fuzzing section and
 imported from other fuzzer instances when doing parallelized fuzzing; and the
 extent to which identical inputs appear to sometimes produce variable behavior
 in the tested binary.
 That last bit is actually fairly interesting: it measures the consistency of
 observed traces. If a program always behaves the same for the same input data,
 it will earn a score of 100%. When the value is lower but still shown in purple,
 the fuzzing process is unlikely to be negatively affected. If it goes into red,
 you may be in trouble, since AFL will have difficulty discerning between
 meaningful and "phantom" effects of tweaking the input file.
 Now, most targets will just get a 100% score, but when you see lower figures,
 there are several things to look at:
  - The use of uninitialized memory in conjunction with some intrinsic sources
    of entropy in the tested binary. Harmless to AFL, but could be indicative
    of a security bug.
  - Attempts to manipulate persistent resources, such as left over temporary
    files or shared memory objects. This is usually harmless, but you may want
    to double-check to make sure the program isn't bailing out prematurely.
    Running out of disk space, SHM handles, or other global resources can
    trigger this, too.
  - Hitting some functionality that is actually designed to behave randomly.
    Generally harmless. For example, when fuzzing sqlite, an input like
    'select random();' will trigger a variable execution path.
  - Multiple threads executing at once in semi-random order. This is harmless
    when the 'stability' metric stays over 90% or so, but can become an issue
    if not. Here's what to try:
    - Use afl-clang-fast from llvm_mode/ - it uses a thread-local tracking
      model that is less prone to concurrency issues,
    - See if the target can be compiled or run without threads. Common
      ./configure options include --without-threads, --disable-pthreads, or
      --disable-openmp.
    - Replace pthreads with GNU Pth (https://www.gnu.org/software/pth/), which
      allows you to use a deterministic scheduler.
  - In persistent mode, minor drops in the "stability" metric can be normal,
    because not all the code behaves identically when re-entered; but major
    dips may signify that the code within __AFL_LOOP() is not behaving
    correctly on subsequent iterations (e.g., due to incomplete clean-up or
    reinitialization of the state) and that most of the fuzzing effort goes
    to waste.
 The paths where variable behavior is detected are marked with a matching entry
 in the <out_dir>/queue/.state/variable_behavior/ directory, so you can look
 them up easily.
 9) CPU load
 -----------
  [cpu: 25%]
 This tiny widget shows the apparent CPU utilization on the local system. It is
 calculated by taking the number of processes in the "runnable" state, and then
 comparing it to the number of logical cores on the system.
 If the value is shown in green, you are using fewer CPU cores than available on
 your system and can probably parallelize to improve performance; for tips on
 how to do that, see parallel_fuzzing.txt.
 If the value is shown in red, your CPU is *possibly* oversubscribed, and
 running additional fuzzers may not give you any benefits.
 Of course, this benchmark is very simplistic; it tells you how many processes
 are ready to run, but not how resource-hungry they may be. It also doesn't
 distinguish between physical cores, logical cores, and virtualized CPUs; the
 performance characteristics of each of these will differ quite a bit.
 If you want a more accurate measurement, you can run the afl-gotcpu utility
 from the command line.
 10) Addendum: status and plot files
 -----------------------------------
 For unattended operation, some of the key status screen information can be also
 found in a machine-readable format in the fuzzer_stats file in the output
 directory. This includes:
  - start_time     - unix time indicating the start time of afl-fuzz
  - last_update    - unix time corresponding to the last update of this file
  - fuzzer_pid     - PID of the fuzzer process
  - cycles_done    - queue cycles completed so far
  - execs_done     - number of execve() calls attempted
  - execs_per_sec  - current number of execs per second
  - paths_total    - total number of entries in the queue
  - paths_found    - number of entries discovered through local fuzzing
  - paths_imported - number of entries imported from other instances
  - max_depth      - number of levels in the generated data set
  - cur_path       - currently processed entry number
  - pending_favs   - number of favored entries still waiting to be fuzzed
  - pending_total  - number of all entries waiting to be fuzzed
  - stability      - percentage of bitmap bytes that behave consistently
  - variable_paths - number of test cases showing variable behavior
  - unique_crashes - number of unique crashes recorded
  - unique_hangs   - number of unique hangs encountered
  - command_line   - full command line used for the fuzzing session
  - slowest_exec_ms- real time of the slowest execution in ms
  - peak_rss_mb    - max rss usage reached during fuzzing in mb
 Most of these map directly to the UI elements discussed earlier on.
 On top of that, you can also find an entry called 'plot_data', containing a
 plottable history for most of these fields. If you have gnuplot installed, you
 can turn this into a nice progress report with the included 'afl-plot' tool.
--- a/docs/technical_details.txt
+++ b/docs/technical_details.txt
@ -0,0 +1,563 @@
 ===================================
 Technical "whitepaper" for afl-fuzz
 ===================================
  This document provides a quick overview of the guts of American Fuzzy Lop.
  See README for the general instruction manual; and for a discussion of
  motivations and design goals behind AFL, see historical_notes.txt.
 0) Design statement
 -------------------
 American Fuzzy Lop does its best not to focus on any singular principle of
 operation and not be a proof-of-concept for any specific theory. The tool can
 be thought of as a collection of hacks that have been tested in practice,
 found to be surprisingly effective, and have been implemented in the simplest,
 most robust way I could think of at the time.
 Many of the resulting features are made possible thanks to the availability of
 lightweight instrumentation that served as a foundation for the tool, but this
 mechanism should be thought of merely as a means to an end. The only true
 governing principles are speed, reliability, and ease of use.
 1) Coverage measurements
 ------------------------
 The instrumentation injected into compiled programs captures branch (edge)
 coverage, along with coarse branch-taken hit counts. The code injected at
 branch points is essentially equivalent to:
  cur_location = <COMPILE_TIME_RANDOM>;
  shared_mem[cur_location ^ prev_location]++; 
  prev_location = cur_location >> 1;
 The cur_location value is generated randomly to simplify the process of
 linking complex projects and keep the XOR output distributed uniformly.
 The shared_mem[] array is a 64 kB SHM region passed to the instrumented binary
 by the caller. Every byte set in the output map can be thought of as a hit for
 a particular (branch_src, branch_dst) tuple in the instrumented code.
 The size of the map is chosen so that collisions are sporadic with almost all
 of the intended targets, which usually sport between 2k and 10k discoverable
 branch points:
   Branch cnt | Colliding tuples | Example targets
  ------------+------------------+-----------------
        1,000 | 0.75%            | giflib, lzo
        2,000 | 1.5%             | zlib, tar, xz
        5,000 | 3.5%             | libpng, libwebp
       10,000 | 7%               | libxml
       20,000 | 14%              | sqlite
       50,000 | 30%              | -
 At the same time, its size is small enough to allow the map to be analyzed
 in a matter of microseconds on the receiving end, and to effortlessly fit
 within L2 cache.
 This form of coverage provides considerably more insight into the execution
 path of the program than simple block coverage. In particular, it trivially
 distinguishes between the following execution traces:
  A -> B -> C -> D -> E (tuples: AB, BC, CD, DE)
  A -> B -> D -> C -> E (tuples: AB, BD, DC, CE)
 This aids the discovery of subtle fault conditions in the underlying code,
 because security vulnerabilities are more often associated with unexpected
 or incorrect state transitions than with merely reaching a new basic block.
 The reason for the shift operation in the last line of the pseudocode shown
 earlier in this section is to preserve the directionality of tuples (without
 this, A ^ B would be indistinguishable from B ^ A) and to retain the identity
 of tight loops (otherwise, A ^ A would be obviously equal to B ^ B).
 The absence of simple saturating arithmetic opcodes on Intel CPUs means that
 the hit counters can sometimes wrap around to zero. Since this is a fairly
 unlikely and localized event, it's seen as an acceptable performance trade-off.
 2) Detecting new behaviors
 --------------------------
 The fuzzer maintains a global map of tuples seen in previous executions; this
 data can be rapidly compared with individual traces and updated in just a couple
 of dword- or qword-wide instructions and a simple loop.
 When a mutated input produces an execution trace containing new tuples, the
 corresponding input file is preserved and routed for additional processing
 later on (see section #3). Inputs that do not trigger new local-scale state
 transitions in the execution trace (i.e., produce no new tuples) are discarded,
 even if their overall control flow sequence is unique.
 This approach allows for a very fine-grained and long-term exploration of
 program state while not having to perform any computationally intensive and
 fragile global comparisons of complex execution traces, and while avoiding the
 scourge of path explosion.
 To illustrate the properties of the algorithm, consider that the second trace
 shown below would be considered substantially new because of the presence of
 new tuples (CA, AE):
  #1: A -> B -> C -> D -> E
  #2: A -> B -> C -> A -> E
 At the same time, with #2 processed, the following pattern will not be seen
 as unique, despite having a markedly different overall execution path:
  #3: A -> B -> C -> A -> B -> C -> A -> B -> C -> D -> E
 In addition to detecting new tuples, the fuzzer also considers coarse tuple
 hit counts. These are divided into several buckets:
  1, 2, 3, 4-7, 8-15, 16-31, 32-127, 128+
 To some extent, the number of buckets is an implementation artifact: it allows
 an in-place mapping of an 8-bit counter generated by the instrumentation to
 an 8-position bitmap relied on by the fuzzer executable to keep track of the
 already-seen execution counts for each tuple.
 Changes within the range of a single bucket are ignored; transition from one
 bucket to another is flagged as an interesting change in program control flow,
 and is routed to the evolutionary process outlined in the section below.
 The hit count behavior provides a way to distinguish between potentially
 interesting control flow changes, such as a block of code being executed
 twice when it was normally hit only once. At the same time, it is fairly
 insensitive to empirically less notable changes, such as a loop going from
 47 cycles to 48. The counters also provide some degree of "accidental"
 immunity against tuple collisions in dense trace maps.
 The execution is policed fairly heavily through memory and execution time
 limits; by default, the timeout is set at 5x the initially-calibrated
 execution speed, rounded up to 20 ms. The aggressive timeouts are meant to
 prevent dramatic fuzzer performance degradation by descending into tarpits
 that, say, improve coverage by 1% while being 100x slower; we pragmatically
 reject them and hope that the fuzzer will find a less expensive way to reach
 the same code. Empirical testing strongly suggests that more generous time
 limits are not worth the cost.
 3) Evolving the input queue
 ---------------------------
 Mutated test cases that produced new state transitions within the program are
 added to the input queue and used as a starting point for future rounds of
 fuzzing. They supplement, but do not automatically replace, existing finds.
 In contrast to more greedy genetic algorithms, this approach allows the tool
 to progressively explore various disjoint and possibly mutually incompatible
 features of the underlying data format, as shown in this image:
  http://lcamtuf.coredump.cx/afl/afl_gzip.png
 Several practical examples of the results of this algorithm are discussed
 here:
  http://lcamtuf.blogspot.com/2014/11/pulling-jpegs-out-of-thin-air.html
  http://lcamtuf.blogspot.com/2014/11/afl-fuzz-nobody-expects-cdata-sections.html
 The synthetic corpus produced by this process is essentially a compact
 collection of "hmm, this does something new!" input files, and can be used to
 seed any other testing processes down the line (for example, to manually
 stress-test resource-intensive desktop apps).
 With this approach, the queue for most targets grows to somewhere between 1k
 and 10k entries; approximately 10-30% of this is attributable to the discovery
 of new tuples, and the remainder is associated with changes in hit counts.
 The following table compares the relative ability to discover file syntax and
 explore program states when using several different approaches to guided
 fuzzing. The instrumented target was GNU patch 2.7.3 compiled with -O3 and
 seeded with a dummy text file; the session consisted of a single pass over the
 input queue with afl-fuzz:
    Fuzzer guidance | Blocks  | Edges   | Edge hit | Highest-coverage
      strategy used | reached | reached | cnt var  | test case generated
  ------------------+---------+---------+----------+---------------------------
     (Initial file) | 156     | 163     | 1.00     | (none)
                    |         |         |          |
    Blind fuzzing S | 182     | 205     | 2.23     | First 2 B of RCS diff
    Blind fuzzing L | 228     | 265     | 2.23     | First 4 B of -c mode diff
     Block coverage | 855     | 1,130   | 1.57     | Almost-valid RCS diff
      Edge coverage | 1,452   | 2,070   | 2.18     | One-chunk -c mode diff
          AFL model | 1,765   | 2,597   | 4.99     | Four-chunk -c mode diff
 The first entry for blind fuzzing ("S") corresponds to executing just a single
 round of testing; the second set of figures ("L") shows the fuzzer running in a
 loop for a number of execution cycles comparable with that of the instrumented
 runs, which required more time to fully process the growing queue.
 Roughly similar results have been obtained in a separate experiment where the
 fuzzer was modified to compile out all the random fuzzing stages and leave just
 a series of rudimentary, sequential operations such as walking bit flips.
 Because this mode would be incapable of altering the size of the input file,
 the sessions were seeded with a valid unified diff:
    Queue extension | Blocks  | Edges   | Edge hit | Number of unique
      strategy used | reached | reached | cnt var  | crashes found
  ------------------+---------+---------+----------+------------------
     (Initial file) | 624     | 717     | 1.00     | -
                    |         |         |          |
      Blind fuzzing | 1,101   | 1,409   | 1.60     | 0
     Block coverage | 1,255   | 1,649   | 1.48     | 0
      Edge coverage | 1,259   | 1,734   | 1.72     | 0
          AFL model | 1,452   | 2,040   | 3.16     | 1
 At noted earlier on, some of the prior work on genetic fuzzing relied on
 maintaining a single test case and evolving it to maximize coverage. At least
 in the tests described above, this "greedy" approach appears to confer no
 substantial benefits over blind fuzzing strategies.
 4) Culling the corpus
 ---------------------
 The progressive state exploration approach outlined above means that some of
 the test cases synthesized later on in the game may have edge coverage that
 is a strict superset of the coverage provided by their ancestors.
 To optimize the fuzzing effort, AFL periodically re-evaluates the queue using a
 fast algorithm that selects a smaller subset of test cases that still cover
 every tuple seen so far, and whose characteristics make them particularly
 favorable to the tool.
 The algorithm works by assigning every queue entry a score proportional to its
 execution latency and file size; and then selecting lowest-scoring candidates
 for each tuple.
 The tuples are then processed sequentially using a simple workflow:
  1) Find next tuple not yet in the temporary working set,
  2) Locate the winning queue entry for this tuple,
  3) Register *all* tuples present in that entry's trace in the working set,
  4) Go to #1 if there are any missing tuples in the set.
 The generated corpus of "favored" entries is usually 5-10x smaller than the
 starting data set. Non-favored entries are not discarded, but they are skipped
 with varying probabilities when encountered in the queue:
  - If there are new, yet-to-be-fuzzed favorites present in the queue, 99%
    of non-favored entries will be skipped to get to the favored ones.
  - If there are no new favorites:
    - If the current non-favored entry was fuzzed before, it will be skipped
      95% of the time.
    - If it hasn't gone through any fuzzing rounds yet, the odds of skipping
      drop down to 75%.
 Based on empirical testing, this provides a reasonable balance between queue
 cycling speed and test case diversity.
 Slightly more sophisticated but much slower culling can be performed on input
 or output corpora with afl-cmin. This tool permanently discards the redundant
 entries and produces a smaller corpus suitable for use with afl-fuzz or
 external tools.
 5) Trimming input files
 -----------------------
 File size has a dramatic impact on fuzzing performance, both because large
 files make the target binary slower, and because they reduce the likelihood
 that a mutation would touch important format control structures, rather than
 redundant data blocks. This is discussed in more detail in perf_tips.txt.
 The possibility that the user will provide a low-quality starting corpus aside,
 some types of mutations can have the effect of iteratively increasing the size
 of the generated files, so it is important to counter this trend.
 Luckily, the instrumentation feedback provides a simple way to automatically
 trim down input files while ensuring that the changes made to the files have no
 impact on the execution path.
 The built-in trimmer in afl-fuzz attempts to sequentially remove blocks of data
 with variable length and stepover; any deletion that doesn't affect the checksum
 of the trace map is committed to disk. The trimmer is not designed to be
 particularly thorough; instead, it tries to strike a balance between precision
 and the number of execve() calls spent on the process, selecting the block size
 and stepover to match. The average per-file gains are around 5-20%.
 The standalone afl-tmin tool uses a more exhaustive, iterative algorithm, and
 also attempts to perform alphabet normalization on the trimmed files. The
 operation of afl-tmin is as follows.
 First, the tool automatically selects the operating mode. If the initial input
 crashes the target binary, afl-tmin will run in non-instrumented mode, simply
 keeping any tweaks that produce a simpler file but still crash the target. If
 the target is non-crashing, the tool uses an instrumented mode and keeps only
 the tweaks that produce exactly the same execution path.
 The actual minimization algorithm is:
  1) Attempt to zero large blocks of data with large stepovers. Empirically,
     this is shown to reduce the number of execs by preempting finer-grained
     efforts later on.
  2) Perform a block deletion pass with decreasing block sizes and stepovers,
     binary-search-style. 
  3) Perform alphabet normalization by counting unique characters and trying
     to bulk-replace each with a zero value.
  4) As a last result, perform byte-by-byte normalization on non-zero bytes.
 Instead of zeroing with a 0x00 byte, afl-tmin uses the ASCII digit '0'. This
 is done because such a modification is much less likely to interfere with
 text parsing, so it is more likely to result in successful minimization of
 text files.
 The algorithm used here is less involved than some other test case
 minimization approaches proposed in academic work, but requires far fewer
 executions and tends to produce comparable results in most real-world
 applications.
 6) Fuzzing strategies
 ---------------------
 The feedback provided by the instrumentation makes it easy to understand the
 value of various fuzzing strategies and optimize their parameters so that they
 work equally well across a wide range of file types. The strategies used by
 afl-fuzz are generally format-agnostic and are discussed in more detail here:
  http://lcamtuf.blogspot.com/2014/08/binary-fuzzing-strategies-what-works.html
 It is somewhat notable that especially early on, most of the work done by
 afl-fuzz is actually highly deterministic, and progresses to random stacked
 modifications and test case splicing only at a later stage. The deterministic
 strategies include:
  - Sequential bit flips with varying lengths and stepovers,
  - Sequential addition and subtraction of small integers,
  - Sequential insertion of known interesting integers (0, 1, INT_MAX, etc),
 The purpose of opening with deterministic steps is related to their tendency to
 produce compact test cases and small diffs between the non-crashing and crashing
 inputs.
 With deterministic fuzzing out of the way, the non-deterministic steps include
 stacked bit flips, insertions, deletions, arithmetics, and splicing of different
 test cases.
 The relative yields and execve() costs of all these strategies have been
 investigated and are discussed in the aforementioned blog post.
 For the reasons discussed in historical_notes.txt (chiefly, performance,
 simplicity, and reliability), AFL generally does not try to reason about the
 relationship between specific mutations and program states; the fuzzing steps
 are nominally blind, and are guided only by the evolutionary design of the
 input queue.
 That said, there is one (trivial) exception to this rule: when a new queue
 entry goes through the initial set of deterministic fuzzing steps, and tweaks to
 some regions in the file are observed to have no effect on the checksum of the
 execution path, they may be excluded from the remaining phases of
 deterministic fuzzing - and the fuzzer may proceed straight to random tweaks.
 Especially for verbose, human-readable data formats, this can reduce the number
 of execs by 10-40% or so without an appreciable drop in coverage. In extreme
 cases, such as normally block-aligned tar archives, the gains can be as high as
 90%.
 Because the underlying "effector maps" are local every queue entry and remain
 in force only during deterministic stages that do not alter the size or the
 general layout of the underlying file, this mechanism appears to work very
 reliably and proved to be simple to implement.
 7) Dictionaries
 ---------------
 The feedback provided by the instrumentation makes it easy to automatically
 identify syntax tokens in some types of input files, and to detect that certain
 combinations of predefined or auto-detected dictionary terms constitute a
 valid grammar for the tested parser.
 A discussion of how these features are implemented within afl-fuzz can be found
 here:
  http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html
 In essence, when basic, typically easily-obtained syntax tokens are combined
 together in a purely random manner, the instrumentation and the evolutionary
 design of the queue together provide a feedback mechanism to differentiate
 between meaningless mutations and ones that trigger new behaviors in the
 instrumented code - and to incrementally build more complex syntax on top of
 this discovery.
 The dictionaries have been shown to enable the fuzzer to rapidly reconstruct
 the grammar of highly verbose and complex languages such as JavaScript, SQL,
 or XML; several examples of generated SQL statements are given in the blog
 post mentioned above.
 Interestingly, the AFL instrumentation also allows the fuzzer to automatically
 isolate syntax tokens already present in an input file. It can do so by looking
 for run of bytes that, when flipped, produce a consistent change to the
 program's execution path; this is suggestive of an underlying atomic comparison
 to a predefined value baked into the code. The fuzzer relies on this signal
 to build compact "auto dictionaries" that are then used in conjunction with
 other fuzzing strategies.
 8) De-duping crashes
 --------------------
 De-duplication of crashes is one of the more important problems for any
 competent fuzzing tool. Many of the naive approaches run into problems; in
 particular, looking just at the faulting address may lead to completely
 unrelated issues being clustered together if the fault happens in a common
 library function (say, strcmp, strcpy); while checksumming call stack
 backtraces can lead to extreme crash count inflation if the fault can be
 reached through a number of different, possibly recursive code paths.
 The solution implemented in afl-fuzz considers a crash unique if any of two
 conditions are met:
  - The crash trace includes a tuple not seen in any of the previous crashes,
  - The crash trace is missing a tuple that was always present in earlier
    faults.
 The approach is vulnerable to some path count inflation early on, but exhibits
 a very strong self-limiting effect, similar to the execution path analysis
 logic that is the cornerstone of afl-fuzz.
 9) Investigating crashes
 ------------------------
 The exploitability of many types of crashes can be ambiguous; afl-fuzz tries
 to address this by providing a crash exploration mode where a known-faulting
 test case is fuzzed in a manner very similar to the normal operation of the
 fuzzer, but with a constraint that causes any non-crashing mutations to be
 thrown away.
 A detailed discussion of the value of this approach can be found here:
  http://lcamtuf.blogspot.com/2014/11/afl-fuzz-crash-exploration-mode.html
 The method uses instrumentation feedback to explore the state of the crashing
 program to get past the ambiguous faulting condition and then isolate the
 newly-found inputs for human review.
 On the subject of crashes, it is worth noting that in contrast to normal
 queue entries, crashing inputs are *not* trimmed; they are kept exactly as
 discovered to make it easier to compare them to the parent, non-crashing entry
 in the queue. That said, afl-tmin can be used to shrink them at will.
 10) The fork server
 -------------------
 To improve performance, afl-fuzz uses a "fork server", where the fuzzed process
 goes through execve(), linking, and libc initialization only once, and is then
 cloned from a stopped process image by leveraging copy-on-write. The
 implementation is described in more detail here:
  http://lcamtuf.blogspot.com/2014/10/fuzzing-binaries-without-execve.html
 The fork server is an integral aspect of the injected instrumentation and
 simply stops at the first instrumented function to await commands from
 afl-fuzz.
 With fast targets, the fork server can offer considerable performance gains,
 usually between 1.5x and 2x. It is also possible to:
  - Use the fork server in manual ("deferred") mode, skipping over larger,
    user-selected chunks of initialization code. It requires very modest
    code changes to the targeted program, and With some targets, can
    produce 10x+ performance gains.
  - Enable "persistent" mode, where a single process is used to try out
    multiple inputs, greatly limiting the overhead of repetitive fork()
    calls. This generally requires some code changes to the targeted program,
    but can improve the performance of fast targets by a factor of 5 or more
    - approximating the benefits of in-process fuzzing jobs while still
    maintaining very robust isolation between the fuzzer process and the
    targeted binary.
 11) Parallelization
 -------------------
 The parallelization mechanism relies on periodically examining the queues
 produced by independently-running instances on other CPU cores or on remote
 machines, and then selectively pulling in the test cases that, when tried
 out locally, produce behaviors not yet seen by the fuzzer at hand.
 This allows for extreme flexibility in fuzzer setup, including running synced
 instances against different parsers of a common data format, often with
 synergistic effects.
 For more information about this design, see parallel_fuzzing.txt.
 12) Binary-only instrumentation
 -------------------------------
 Instrumentation of black-box, binary-only targets is accomplished with the
 help of a separately-built version of QEMU in "user emulation" mode. This also
 allows the execution of cross-architecture code - say, ARM binaries on x86.
 QEMU uses basic blocks as translation units; the instrumentation is implemented
 on top of this and uses a model roughly analogous to the compile-time hooks:
  if (block_address > elf_text_start && block_address < elf_text_end) {
    cur_location = (block_address >> 4) ^ (block_address << 8);
    shared_mem[cur_location ^ prev_location]++; 
    prev_location = cur_location >> 1;
  }
 The shift-and-XOR-based scrambling in the second line is used to mask the
 effects of instruction alignment.
 The start-up of binary translators such as QEMU, DynamoRIO, and PIN is fairly
 slow; to counter this, the QEMU mode leverages a fork server similar to that
 used for compiler-instrumented code, effectively spawning copies of an
 already-initialized process paused at _start.
 First-time translation of a new basic block also incurs substantial latency. To
 eliminate this problem, the AFL fork server is extended by providing a channel
 between the running emulator and the parent process. The channel is used
 to notify the parent about the addresses of any newly-encountered blocks and to
 add them to the translation cache that will be replicated for future child
 processes.
 As a result of these two optimizations, the overhead of the QEMU mode is
 roughly 2-5x, compared to 100x+ for PIN.
 13) The afl-analyze tool
 ------------------------
 The file format analyzer is a simple extension of the minimization algorithm
 discussed earlier on; instead of attempting to remove no-op blocks, the tool
 performs a series of walking byte flips and then annotates runs of bytes
 in the input file.
 It uses the following classification scheme:
  - "No-op blocks" - segments where bit flips cause no apparent changes to
    control flow. Common examples may be comment sections, pixel data within
    a bitmap file, etc.
  - "Superficial content" - segments where some, but not all, bitflips
    produce some control flow changes. Examples may include strings in rich
    documents (e.g., XML, RTF).
  - "Critical stream" - a sequence of bytes where all bit flips alter control
    flow in different but correlated ways. This may be compressed data, 
    non-atomically compared keywords or magic values, etc.
  - "Suspected length field" - small, atomic integer that, when touched in
    any way, causes a consistent change to program control flow, suggestive
    of a failed length check.
  - "Suspected cksum or magic int" - an integer that behaves similarly to a
    length field, but has a numerical value that makes the length explanation
    unlikely. This is suggestive of a checksum or other "magic" integer.
  - "Suspected checksummed block" - a long block of data where any change 
    always triggers the same new execution path. Likely caused by failing
    a checksum or a similar integrity check before any subsequent parsing
    takes place.
  - "Magic value section" - a generic token where changes cause the type
    of binary behavior outlined earlier, but that doesn't meet any of the
    other criteria. May be an atomically compared keyword or so.
--- a/docs/visualization/afl_gzip.png
+++ b/docs/visualization/afl_gzip.png
--- a/docs/vuln_samples/bash-cmd-exec.var
+++ b/docs/vuln_samples/bash-cmd-exec.var
@ -0,0 +1 @@
 () { _; } >_[$($())] { id; }
--- a/docs/vuln_samples/bash-uninit-mem.var
+++ b/docs/vuln_samples/bash-uninit-mem.var
@ -0,0 +1 @@
 () { x() { _; }; x() { _; } <<a; }
--- a/docs/vuln_samples/ffmpeg-h264-bad-ptr-800m.mp4
+++ b/docs/vuln_samples/ffmpeg-h264-bad-ptr-800m.mp4
--- a/docs/vuln_samples/ffmpeg-h264-bad-read.mp4
+++ b/docs/vuln_samples/ffmpeg-h264-bad-read.mp4
--- a/docs/vuln_samples/ffmpeg-h264-call-stack-overflow.mp4
+++ b/docs/vuln_samples/ffmpeg-h264-call-stack-overflow.mp4
--- a/docs/vuln_samples/file-fpu-exception.elf
+++ b/docs/vuln_samples/file-fpu-exception.elf
--- a/docs/vuln_samples/firefox-bmp-leak.bmp
+++ b/docs/vuln_samples/firefox-bmp-leak.bmp
--- a/docs/vuln_samples/firefox-chrome-leak.jpg
+++ b/docs/vuln_samples/firefox-chrome-leak.jpg
--- a/docs/vuln_samples/firefox-gif-leak.gif
+++ b/docs/vuln_samples/firefox-gif-leak.gif
--- a/docs/vuln_samples/firefox-gif-leak2.gif
+++ b/docs/vuln_samples/firefox-gif-leak2.gif
--- a/docs/vuln_samples/jxrlib-crash.jxr
+++ b/docs/vuln_samples/jxrlib-crash.jxr
--- a/docs/vuln_samples/jxrlib-crash2.jxr
+++ b/docs/vuln_samples/jxrlib-crash2.jxr
--- a/docs/vuln_samples/jxrlib-crash3.jxr
+++ b/docs/vuln_samples/jxrlib-crash3.jxr
--- a/docs/vuln_samples/jxrlib-crash4.jxr
+++ b/docs/vuln_samples/jxrlib-crash4.jxr
--- a/docs/vuln_samples/lesspipe-cpio-bad-write.cpio
+++ b/docs/vuln_samples/lesspipe-cpio-bad-write.cpio
--- a/docs/vuln_samples/libjpeg-sos-leak.jpg
+++ b/docs/vuln_samples/libjpeg-sos-leak.jpg
--- a/docs/vuln_samples/libjpeg-turbo-dht-leak.jpg
+++ b/docs/vuln_samples/libjpeg-turbo-dht-leak.jpg
--- a/docs/vuln_samples/libtiff-bad-write.tif
+++ b/docs/vuln_samples/libtiff-bad-write.tif
--- a/docs/vuln_samples/libtiff-uninit-mem.tif
+++ b/docs/vuln_samples/libtiff-uninit-mem.tif
--- a/docs/vuln_samples/libtiff-uninit-mem2.tif
+++ b/docs/vuln_samples/libtiff-uninit-mem2.tif
--- a/docs/vuln_samples/libtiff-uninit-mem3.tif
+++ b/docs/vuln_samples/libtiff-uninit-mem3.tif
--- a/docs/vuln_samples/libtiff-uninit-mem4.tif
+++ b/docs/vuln_samples/libtiff-uninit-mem4.tif
--- a/docs/vuln_samples/libxml2-bad-read.xml
+++ b/docs/vuln_samples/libxml2-bad-read.xml
@ -0,0 +1,3 @@
 <!DOCTYPEd[<!ENTITY
 S	""><!ENTITY %
 N	"<!ELEMENT<![INCLUDE0"<!ENTITYL%N;
--- a/docs/vuln_samples/msie-dht-leak.jpg
+++ b/docs/vuln_samples/msie-dht-leak.jpg
--- a/docs/vuln_samples/msie-jxr-mem-leak.jxr
+++ b/docs/vuln_samples/msie-jxr-mem-leak.jxr
--- a/docs/vuln_samples/msie-png-mem-leak.png
+++ b/docs/vuln_samples/msie-png-mem-leak.png
--- a/docs/vuln_samples/msie-tiff-mem-leak.tif
+++ b/docs/vuln_samples/msie-tiff-mem-leak.tif
--- a/docs/vuln_samples/msie-zlib-dos.png
+++ b/docs/vuln_samples/msie-zlib-dos.png
--- a/docs/vuln_samples/openssl-null-ptr.der
+++ b/docs/vuln_samples/openssl-null-ptr.der
--- a/docs/vuln_samples/openssl-null-ptr2.der
+++ b/docs/vuln_samples/openssl-null-ptr2.der
--- a/docs/vuln_samples/photoshop-mem-leak.jpg
+++ b/docs/vuln_samples/photoshop-mem-leak.jpg
--- a/docs/vuln_samples/sqlite-bad-free.sql
+++ b/docs/vuln_samples/sqlite-bad-free.sql
@ -0,0 +1,2 @@
 create table t0(o CHar(0)CHECK(0&O>O));insert into t0
 select randomblob(0)-trim(0);
--- a/docs/vuln_samples/sqlite-bad-ptr.sql
+++ b/docs/vuln_samples/sqlite-bad-ptr.sql
@ -0,0 +1 @@
 SELECT 0 UNION SELECT 0 ORDER BY 1 COLLATE"""""""";
--- a/docs/vuln_samples/sqlite-bad-ptr2.sql
+++ b/docs/vuln_samples/sqlite-bad-ptr2.sql
@ -0,0 +1 @@
 PRAGMA foreign_keys=1;CREATE TABLE t1("""0"PRIMARY KEy REFERENCES t1 ON DELETE SET NULL);REPLACE INTO t1 SELECT(0);
--- a/docs/vuln_samples/sqlite-bad-ptr3.sql
+++ b/docs/vuln_samples/sqlite-bad-ptr3.sql
--- a/docs/vuln_samples/sqlite-heap-overflow.sql
+++ b/docs/vuln_samples/sqlite-heap-overflow.sql
@ -0,0 +1,2 @@
 DROP TABLE IF EXISTS t;CREATE VIRTUAL TABLE t0 USING fts4();insert into t0 select zeroblob(0);SAVEPOINT O;insert into t0
 select(0);SAVEPOINT E;insert into t0 SELECT 0 UNION SELECT 0'x'ORDER BY x;
--- a/docs/vuln_samples/sqlite-heap-overwrite.sql
+++ b/docs/vuln_samples/sqlite-heap-overwrite.sql
--- a/docs/vuln_samples/sqlite-negative-memset.sql
+++ b/docs/vuln_samples/sqlite-negative-memset.sql
@ -0,0 +1 @@
 SELECT*from(select"",zeroblob(0),zeroblob(1E9),zeroblob(0),zeroblob(150000000),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(1E9),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0)),(select"",zeroblob(1E9),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(1E9),(0),zeroblob(150000000),(0),zeroblob(0),(0)EXCEPT select zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0));
--- a/docs/vuln_samples/sqlite-null-ptr1.sql
+++ b/docs/vuln_samples/sqlite-null-ptr1.sql
@ -0,0 +1,2 @@
 create table t0(t);insert into t0
 select strftime();
--- a/docs/vuln_samples/sqlite-null-ptr10.sql
+++ b/docs/vuln_samples/sqlite-null-ptr10.sql
@ -0,0 +1 @@
 SELECT fts3_tokenizer(@0());
--- a/docs/vuln_samples/sqlite-null-ptr11.sql
+++ b/docs/vuln_samples/sqlite-null-ptr11.sql
@ -0,0 +1 @@
 select''like''like''like#0;
--- a/docs/vuln_samples/sqlite-null-ptr12.sql
+++ b/docs/vuln_samples/sqlite-null-ptr12.sql
@ -0,0 +1 @@
 PRAGMA e;select lower(0);select lower(0)"a",""GROUP BY a ORDER BY a;
--- a/docs/vuln_samples/sqlite-null-ptr13.sql
+++ b/docs/vuln_samples/sqlite-null-ptr13.sql
@ -0,0 +1 @@
 WITH x AS(SELECT*FROM t)SELECT""EXCEPT SELECT 0 ORDER BY 0 COLLATE"";
--- a/docs/vuln_samples/sqlite-null-ptr14.sql
+++ b/docs/vuln_samples/sqlite-null-ptr14.sql
@ -0,0 +1 @@
 CREATE VIRTUAL TABLE x USING fts4();VALUES(0,0),(0,0),(0,0),(0,0);PRAGMA writable_schema=ON;UPDATE sqlite_master SET sql=''WHERE name='';UPDATE sqlite_master SET sql='CREATE table t(d CHECK(T(#0)';SAVEPOINT K;SAVEPOINT T;SAVEPOINT T;ANALYZE;ROLLBACK;SAVEPOINT E;DROP TABLE IF EXISTS t;
--- a/docs/vuln_samples/sqlite-null-ptr15.sql
+++ b/docs/vuln_samples/sqlite-null-ptr15.sql
@ -0,0 +1 @@
 CREATE VIRTUAL TABLE t4 USING fts4(0,b,c,notindexed=0);INSERT INTO t4 VALUES('','','0');BEGIN;INSERT INTO t4 VALUES('','','0');INSERT INTO t4(t4)VALUES('integrity-check');
--- a/docs/vuln_samples/sqlite-null-ptr2.sql
+++ b/docs/vuln_samples/sqlite-null-ptr2.sql
@ -0,0 +1 @@
 DETACH(select group_concat(q));
--- a/docs/vuln_samples/sqlite-null-ptr3.sql
+++ b/docs/vuln_samples/sqlite-null-ptr3.sql
@ -0,0 +1 @@
 select(select strftime());
--- a/docs/vuln_samples/sqlite-null-ptr4.sql
+++ b/docs/vuln_samples/sqlite-null-ptr4.sql
@ -0,0 +1 @@
 select n()AND+#00;
--- a/docs/vuln_samples/sqlite-null-ptr5.sql
+++ b/docs/vuln_samples/sqlite-null-ptr5.sql
@ -0,0 +1 @@
 select e.*,0 from(s,(L))e;
--- a/docs/vuln_samples/sqlite-null-ptr6.sql
+++ b/docs/vuln_samples/sqlite-null-ptr6.sql
@ -0,0 +1 @@
 PRAGMA encoding='UTF16';CREATE VIRTUAL TABLE È USING s;
--- a/docs/vuln_samples/sqlite-null-ptr7.sql
+++ b/docs/vuln_samples/sqlite-null-ptr7.sql
@ -0,0 +1 @@
 CREATE VIRTUAL TABLE t USING fts4(tokenize=);
--- a/docs/vuln_samples/sqlite-null-ptr8.sql
+++ b/docs/vuln_samples/sqlite-null-ptr8.sql
@ -0,0 +1 @@
 CREATE TABLE p(a UNIQUE,PRIMARY KEY('a'))WITHOUT rowid;
--- a/docs/vuln_samples/sqlite-null-ptr9.sql
+++ b/docs/vuln_samples/sqlite-null-ptr9.sql
@ -0,0 +1 @@
 CREATE TABLE t0(z);WITH d(x)AS(SELECT*UNION SELECT 0)INSERT INTO t0 SELECT 0 FROM d;
--- a/docs/vuln_samples/sqlite-oob-read.sql
+++ b/docs/vuln_samples/sqlite-oob-read.sql
@ -0,0 +1 @@
 create table t0(‰ DEFAULT(0=0)NOT/**/NULL);REPLACE into t0 select'';
--- a/docs/vuln_samples/sqlite-oob-write.sql
+++ b/docs/vuln_samples/sqlite-oob-write.sql
@ -0,0 +1,6 @@
 CREATE VIRTUAL TABLE t0 USING fts4(x,order=DESC);
 INSERT INTO t0(docid,x)VALUES(-1E0,'0(o');
 INSERT INTO t0 VALUES('');
 INSERT INTO t0 VALUES('');
 INSeRT INTO t0 VALUES('o');
 SELECT docid FROM t0 WHERE t0 MATCH'"0*o"';
--- a/docs/vuln_samples/sqlite-stack-buf-overflow.sql
+++ b/docs/vuln_samples/sqlite-stack-buf-overflow.sql
@ -0,0 +1 @@
 SELECT printf('%*.*f',90000||006000000&6600000000,00000000000000000909000000000000.0000000000000000)""WHERE"">"";
--- a/docs/vuln_samples/sqlite-stack-exhaustion.sql
+++ b/docs/vuln_samples/sqlite-stack-exhaustion.sql
@ -0,0 +1 @@
 CREATE VIRTUAL TABLE t0 USING fts4(content=t0);
--- a/docs/vuln_samples/sqlite-unint-mem.sql
+++ b/docs/vuln_samples/sqlite-unint-mem.sql
@ -0,0 +1 @@
 REATE VIRTUAL TABLE t0 USING fts4(prefix=0);INSERT INTO t0 VALUES(0);
--- a/docs/vuln_samples/sqlite-use-after-free.sql
+++ b/docs/vuln_samples/sqlite-use-after-free.sql
@ -0,0 +1 @@
 create table t(s);PRAGMA writable_schema=ON;UPDATE sqlite_master SET sql='ANALYZE;CREATE VIRTUAL TABLE t USING fts3;DROP TABLE t;DROP TABLE EXISTS t';PRAGMA r;SAVEPOINT T;ANALYZE;ROLLBACK;SAVEPOINT E;DROP TABLE IF EXISTS t;
--- a/docs/vuln_samples/strings-bfd-badptr.elf
+++ b/docs/vuln_samples/strings-bfd-badptr.elf
--- a/docs/vuln_samples/strings-bfd-badptr2.elf
+++ b/docs/vuln_samples/strings-bfd-badptr2.elf
--- a/docs/vuln_samples/strings-stack-overflow
+++ b/docs/vuln_samples/strings-stack-overflow
@ -0,0 +1,3 @@
 $$@$$$@$o
 S…Ôo
 S…Ô
--- a/docs/vuln_samples/strings-unchecked-ctr.elf
+++ b/docs/vuln_samples/strings-unchecked-ctr.elf
--- a/docs/vuln_samples/tcpdump-arp-crash.pcap
+++ b/docs/vuln_samples/tcpdump-arp-crash.pcap
--- a/docs/vuln_samples/tcpdump-ppp-crash.pcap
+++ b/docs/vuln_samples/tcpdump-ppp-crash.pcap
--- a/docs/vuln_samples/unrtf-arbitrary-read.rtf
+++ b/docs/vuln_samples/unrtf-arbitrary-read.rtf
--- a/Show More
+++ b/Show More
		`@ -0,0 +1,2 @@`
							`create table t0(o CHar(0)CHECK(0&O>O));insert into t0`
							`select randomblob(0)-trim(0);`
		`@ -0,0 +1 @@`
							`SELECT 0 UNION SELECT 0 ORDER BY 1 COLLATE"""""""";`
		`@ -0,0 +1 @@`
							`PRAGMA foreign_keys=1;CREATE TABLE t1("""0"PRIMARY KEy REFERENCES t1 ON DELETE SET NULL);REPLACE INTO t1 SELECT(0);`
		`@ -0,0 +1,2 @@`
							`DROP TABLE IF EXISTS t;CREATE VIRTUAL TABLE t0 USING fts4();insert into t0 select zeroblob(0);SAVEPOINT O;insert into t0`
							`select(0);SAVEPOINT E;insert into t0 SELECT 0 UNION SELECT 0'x'ORDER BY x;`
		`@ -0,0 +1 @@`
							SELECT*from(select"",zeroblob(0),zeroblob(1E9),zeroblob(0),zeroblob(150000000),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(1E9),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0)),(select"",zeroblob(1E9),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(1E9),(0),zeroblob(150000000),(0),zeroblob(0),(0)EXCEPT select zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0));
		`@ -0,0 +1,2 @@`
							`create table t0(t);insert into t0`
							`select strftime();`
		`@ -0,0 +1 @@`
							`PRAGMA e;select lower(0);select lower(0)"a",""GROUP BY a ORDER BY a;`
		`@ -0,0 +1 @@`
							`WITH x AS(SELECT*FROM t)SELECT""EXCEPT SELECT 0 ORDER BY 0 COLLATE"";`
		`@ -0,0 +1 @@`
							`CREATE VIRTUAL TABLE x USING fts4();VALUES(0,0),(0,0),(0,0),(0,0);PRAGMA writable_schema=ON;UPDATE sqlite_master SET sql=''WHERE name='';UPDATE sqlite_master SET sql='CREATE table t(d CHECK(T(#0)';SAVEPOINT K;SAVEPOINT T;SAVEPOINT T;ANALYZE;ROLLBACK;SAVEPOINT E;DROP TABLE IF EXISTS t;`
		`@ -0,0 +1 @@`
							`CREATE VIRTUAL TABLE t4 USING fts4(0,b,c,notindexed=0);INSERT INTO t4 VALUES('','','0');BEGIN;INSERT INTO t4 VALUES('','','0');INSERT INTO t4(t4)VALUES('integrity-check');`