diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57df30e --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +# Binaries produced by "make". +afl-analyze +afl-as +afl-clang +afl-clang++ +afl-fuzz +afl-g++ +afl-gcc +afl-gotcpu +afl-showmap +afl-tmin +as + +# Binaries produced by "make -C llvm_mode" +afl-clang-fast +afl-clang-fast++ +afl-llvm-pass.so +afl-llvm-rt-32.o +afl-llvm-rt-64.o +afl-llvm-rt.o diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..861728d --- /dev/null +++ b/.travis.yml @@ -0,0 +1,60 @@ +language: c + +env: + - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_STOP_MANUALLY=1 + - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_EXIT_WHEN_DONE=1 + # TODO: test AFL_BENCH_UNTIL_CRASH once we have a target that crashes + - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_BENCH_JUST_ONE=1 + +before_install: + - sudo apt update + - sudo apt install -y libtool libtool-bin automake bison libglib2.0 + +# TODO: Look into splitting off some builds using a build matrix. +# TODO: Move this all into a bash script so we don't need to write bash in yaml. +script: + - make + - ./afl-gcc ./test-instr.c -o test-instr-gcc + - mkdir seeds + - echo "" > seeds/nil_seed + - if [ -z "$AFL_STOP_MANUALLY" ]; + then ./afl-fuzz -i seeds -o out/ -- ./test-instr-gcc; + else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-gcc; + fi + - .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 3 + - rm -r out/* + - ./afl-clang ./test-instr.c -o test-instr-clang + - if [ -z "$AFL_STOP_MANUALLY" ]; + then ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang; + else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang; + fi + - .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 2 + - make clean + - CC=clang CXX=clang++ make + - cd llvm_mode + # TODO: Build with different versions of clang/LLVM since LLVM passes don't + # have a stable API. + - CC=clang CXX=clang++ LLVM_CONFIG=llvm-config make + - cd .. + - rm -r out/* + - ./afl-clang-fast ./test-instr.c -o test-instr-clang-fast + - if [ -z "$AFL_STOP_MANUALLY" ]; + then ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang-fast; + else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang-fast; + fi + - .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 3 + # Test fuzzing libFuzzer targets and trace-pc-guard instrumentation. + - clang -g -fsanitize-coverage=trace-pc-guard ./test-libfuzzer-target.c -c + - clang -c -w llvm_mode/afl-llvm-rt.o.c + - wget https://raw.githubusercontent.com/llvm/llvm-project/main/compiler-rt/lib/fuzzer/afl/afl_driver.cpp + - clang++ afl_driver.cpp afl-llvm-rt.o.o test-libfuzzer-target.o -o test-libfuzzer-target + - timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-libfuzzer-target + - cd qemu_mode + - ./build_qemu_support.sh + - cd .. + - gcc ./test-instr.c -o test-no-instr + - if [ -z "$AFL_STOP_MANUALLY" ]; + then ./afl-fuzz -Q -i seeds -o out/ -- ./test-no-instr; + else timeout --preserve-status 5s ./afl-fuzz -Q -i seeds -o out/ -- ./test-no-instr; + fi + - .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 12 -p 9 diff --git a/check_fuzzer_stats.sh b/.travis/check_fuzzer_stats.sh similarity index 96% rename from check_fuzzer_stats.sh rename to .travis/check_fuzzer_stats.sh index 010e31a..6fd6bd5 100644 --- a/check_fuzzer_stats.sh +++ b/.travis/check_fuzzer_stats.sh @@ -1,60 +1,60 @@ -#!/bin/bash -usage() { - echo "Usage: $0 -o -k -v [-p ]" 1>&2; - echo " " 1>&2; - echo "Checks if a key:value appears in the fuzzer_stats report" 1>&2; - echo " " 1>&2; - echo -n "If \"value\" is numeric and \"precision\" is defined, checks if the stat " 1>&2; - echo "printed by afl is value+/-precision." 1>&2; - exit 1; } - -while getopts "o:k:v:p:" opt; do - case "${opt}" in - o) - o=${OPTARG} - ;; - k) - k=${OPTARG} - ;; - v) - v=${OPTARG} - ;; - p) - p=${OPTARG} - ;; - *) - usage - ;; - esac -done - -if [ -z $o ] || [ -z $k ] || [ -z $v ]; then usage; fi - -# xargs to trim the surrounding whitespaces -stat_v=$( grep $k "$o"/fuzzer_stats | cut -d ":" -f 2 | xargs ) -v=$( echo "$v" | xargs ) - -if [ -z stat_v ]; - then echo "ERROR: key $k not found in fuzzer_stats." 1>&2 - exit 1 -fi - -re_percent='^[0-9]+([.][0-9]+)?\%$' -# if the argument is a number in percentage, get rid of the % -if [[ "$v" =~ $re_percent ]]; then v=${v: :-1}; fi -if [[ "$stat_v" =~ $re_percent ]]; then stat_v=${stat_v: :-1}; fi - -re_numeric='^[0-9]+([.][0-9]+)?$' -# if the argument is not a number, we check for strict equality -if (! [[ "$v" =~ $re_numeric ]]) || (! [[ "$stat_v" =~ $re ]]); - then if [ "$v" != "$stat_v" ]; - then echo "ERROR: \"$k:$stat_v\" (should be $v)." 1>&2 - exit 2; - fi -# checks if the stat reported by afl is in the range -elif [ "$stat_v" -lt $(( v - p )) ] || [ "$stat_v" -gt $(( v + p )) ]; - then echo "ERROR: key $k:$stat_v is out of correct range." 1>&2 - exit 3; -fi -echo "OK: key $k:$stat_v" 1>&2 - +#!/bin/bash +usage() { + echo "Usage: $0 -o -k -v [-p ]" 1>&2; + echo " " 1>&2; + echo "Checks if a key:value appears in the fuzzer_stats report" 1>&2; + echo " " 1>&2; + echo -n "If \"value\" is numeric and \"precision\" is defined, checks if the stat " 1>&2; + echo "printed by afl is value+/-precision." 1>&2; + exit 1; } + +while getopts "o:k:v:p:" opt; do + case "${opt}" in + o) + o=${OPTARG} + ;; + k) + k=${OPTARG} + ;; + v) + v=${OPTARG} + ;; + p) + p=${OPTARG} + ;; + *) + usage + ;; + esac +done + +if [ -z $o ] || [ -z $k ] || [ -z $v ]; then usage; fi + +# xargs to trim the surrounding whitespaces +stat_v=$( grep $k "$o"/fuzzer_stats | cut -d ":" -f 2 | xargs ) +v=$( echo "$v" | xargs ) + +if [ -z stat_v ]; + then echo "ERROR: key $k not found in fuzzer_stats." 1>&2 + exit 1 +fi + +re_percent='^[0-9]+([.][0-9]+)?\%$' +# if the argument is a number in percentage, get rid of the % +if [[ "$v" =~ $re_percent ]]; then v=${v: :-1}; fi +if [[ "$stat_v" =~ $re_percent ]]; then stat_v=${stat_v: :-1}; fi + +re_numeric='^[0-9]+([.][0-9]+)?$' +# if the argument is not a number, we check for strict equality +if (! [[ "$v" =~ $re_numeric ]]) || (! [[ "$stat_v" =~ $re ]]); + then if [ "$v" != "$stat_v" ]; + then echo "ERROR: \"$k:$stat_v\" (should be $v)." 1>&2 + exit 2; + fi +# checks if the stat reported by afl is in the range +elif [ "$stat_v" -lt $(( v - p )) ] || [ "$stat_v" -gt $(( v + p )) ]; + then echo "ERROR: key $k:$stat_v is out of correct range." 1>&2 + exit 3; +fi +echo "OK: key $k:$stat_v" 1>&2 + diff --git a/README.md b/README.md index 1f3bdfd..49960b9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,493 @@ -# 开源项目阅读:AFL +# american fuzzy lop -- 源代码地址:https://github.com/google/AFL +[![Build Status](https://travis-ci.org/google/AFL.svg?branch=master)](https://travis-ci.org/google/AFL) +Originally developed by Michal Zalewski . + +See [QuickStartGuide.txt](docs/QuickStartGuide.txt) if you don't have time to read +this file. + +## 1) Challenges of guided fuzzing + +Fuzzing is one of the most powerful and proven strategies for identifying +security issues in real-world software; it is responsible for the vast +majority of remote code execution and privilege escalation bugs found to date +in security-critical software. + +Unfortunately, fuzzing is also relatively shallow; blind, random mutations +make it very unlikely to reach certain code paths in the tested code, leaving +some vulnerabilities firmly outside the reach of this technique. + +There have been numerous attempts to solve this problem. One of the early +approaches - pioneered by Tavis Ormandy - is corpus distillation. The method +relies on coverage signals to select a subset of interesting seeds from a +massive, high-quality corpus of candidate files, and then fuzz them by +traditional means. The approach works exceptionally well, but requires such +a corpus to be readily available. In addition, block coverage measurements +provide only a very simplistic understanding of program state, and are less +useful for guiding the fuzzing effort in the long haul. + +Other, more sophisticated research has focused on techniques such as program +flow analysis ("concolic execution"), symbolic execution, or static analysis. +All these methods are extremely promising in experimental settings, but tend +to suffer from reliability and performance problems in practical uses - and +currently do not offer a viable alternative to "dumb" fuzzing techniques. + +## 2) The afl-fuzz approach + +American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple +but rock-solid instrumentation-guided genetic algorithm. It uses a modified +form of edge coverage to effortlessly pick up subtle, local-scale changes to +program control flow. + +Simplifying a bit, the overall algorithm can be summed up as: + + 1) Load user-supplied initial test cases into the queue, + + 2) Take next input file from the queue, + + 3) Attempt to trim the test case to the smallest size that doesn't alter + the measured behavior of the program, + + 4) Repeatedly mutate the file using a balanced and well-researched variety + of traditional fuzzing strategies, + + 5) If any of the generated mutations resulted in a new state transition + recorded by the instrumentation, add mutated output as a new entry in the + queue. + + 6) Go to 2. + +The discovered test cases are also periodically culled to eliminate ones that +have been obsoleted by newer, higher-coverage finds; and undergo several other +instrumentation-driven effort minimization steps. + +As a side result of the fuzzing process, the tool creates a small, +self-contained corpus of interesting test cases. These are extremely useful +for seeding other, labor- or resource-intensive testing regimes - for example, +for stress-testing browsers, office applications, graphics suites, or +closed-source tools. + +The fuzzer is thoroughly tested to deliver out-of-the-box performance far +superior to blind fuzzing or coverage-only tools. + +## 3) Instrumenting programs for use with AFL + +When source code is available, instrumentation can be injected by a companion +tool that works as a drop-in replacement for gcc or clang in any standard build +process for third-party code. + +The instrumentation has a fairly modest performance impact; in conjunction with +other optimizations implemented by afl-fuzz, most programs can be fuzzed as fast +or even faster than possible with traditional tools. + +The correct way to recompile the target program may vary depending on the +specifics of the build process, but a nearly-universal approach would be: + +```shell +$ CC=/path/to/afl/afl-gcc ./configure +$ make clean all +``` + +For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`. + +The clang wrappers (afl-clang and afl-clang++) can be used in the same way; +clang users may also opt to leverage a higher-performance instrumentation mode, +as described in llvm_mode/README.llvm. + +When testing libraries, you need to find or write a simple program that reads +data from stdin or from a file and passes it to the tested library. In such a +case, it is essential to link this executable against a static version of the +instrumented library, or to make sure that the correct .so file is loaded at +runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static +build, usually possible via: + +```shell +$ CC=/path/to/afl/afl-gcc ./configure --disable-shared +``` + +Setting `AFL_HARDEN=1` when calling 'make' will cause the CC wrapper to +automatically enable code hardening options that make it easier to detect +simple memory bugs. Libdislocator, a helper library included with AFL (see +libdislocator/README.dislocator) can help uncover heap corruption issues, too. + +PS. ASAN users are advised to review [notes_for_asan.txt](docs/notes_for_asan.txt) file for important +caveats. + +## 4) Instrumenting binary-only apps + +When source code is *NOT* available, the fuzzer offers experimental support for +fast, on-the-fly instrumentation of black-box binaries. This is accomplished +with a version of QEMU running in the lesser-known "user space emulation" mode. + +QEMU is a project separate from AFL, but you can conveniently build the +feature by doing: + +```shell +$ cd qemu_mode +$ ./build_qemu_support.sh +``` + +For additional instructions and caveats, see qemu_mode/README.qemu. + +The mode is approximately 2-5x slower than compile-time instrumentation, is +less conducive to parallelization, and may have some other quirks. + +## 5) Choosing initial test cases + +To operate correctly, the fuzzer requires one or more starting file that +contains a good example of the input data normally expected by the targeted +application. There are two basic rules: + + - Keep the files small. Under 1 kB is ideal, although not strictly necessary. + For a discussion of why size matters, see [perf_tips.txt](docs/perf_tips.txt). + + - Use multiple test cases only if they are functionally different from + each other. There is no point in using fifty different vacation photos + to fuzz an image library. + +You can find many good examples of starting files in the testcases/ subdirectory +that comes with this tool. + +PS. If a large corpus of data is available for screening, you may want to use +the afl-cmin utility to identify a subset of functionally distinct files that +exercise different code paths in the target binary. + +## 6) Fuzzing binaries + +The fuzzing process itself is carried out by the afl-fuzz utility. This program +requires a read-only directory with initial test cases, a separate place to +store its findings, plus a path to the binary to test. + +For target binaries that accept input directly from stdin, the usual syntax is: + +```shell +$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program [...params...] +``` + +For programs that take input from a file, use '@@' to mark the location in +the target's command line where the input file name should be placed. The +fuzzer will substitute this for you: + +```shell +$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@ +``` + +You can also use the -f option to have the mutated data written to a specific +file. This is useful if the program expects a particular file extension or so. + +Non-instrumented binaries can be fuzzed in the QEMU mode (add -Q in the command +line) or in a traditional, blind-fuzzer mode (specify -n). + +You can use -t and -m to override the default timeout and memory limit for the +executed process; rare examples of targets that may need these settings touched +include compilers and video decoders. + +Tips for optimizing fuzzing performance are discussed in [perf_tips.txt](docs/perf_tips.txt). + +Note that afl-fuzz starts by performing an array of deterministic fuzzing +steps, which can take several days, but tend to produce neat test cases. If you +want quick & dirty results right away - akin to zzuf and other traditional +fuzzers - add the -d option to the command line. + +## 7) Interpreting output + +See the [status_screen.txt](docs/status_screen.txt) file for information on +how to interpret the displayed stats and monitor the health of the process. +Be sure to consult this file especially if any UI elements are highlighted in +red. + +The fuzzing process will continue until you press Ctrl-C. At minimum, you want +to allow the fuzzer to complete one queue cycle, which may take anywhere from a +couple of hours to a week or so. + +There are three subdirectories created within the output directory and updated +in real time: + + - queue/ - test cases for every distinctive execution path, plus all the + starting files given by the user. This is the synthesized corpus + mentioned in section 2. + Before using this corpus for any other purposes, you can shrink + it to a smaller size using the afl-cmin tool. The tool will find + a smaller subset of files offering equivalent edge coverage. + + - crashes/ - unique test cases that cause the tested program to receive a + fatal signal (e.g., SIGSEGV, SIGILL, SIGABRT). The entries are + grouped by the received signal. + + - hangs/ - unique test cases that cause the tested program to time out. The + default time limit before something is classified as a hang is + the larger of 1 second and the value of the -t parameter. + The value can be fine-tuned by setting AFL_HANG_TMOUT, but this + is rarely necessary. + +Crashes and hangs are considered "unique" if the associated execution paths +involve any state transitions not seen in previously-recorded faults. If a +single bug can be reached in multiple ways, there will be some count inflation +early in the process, but this should quickly taper off. + +The file names for crashes and hangs are correlated with parent, non-faulting +queue entries. This should help with debugging. + +When you can't reproduce a crash found by afl-fuzz, the most likely cause is +that you are not setting the same memory limit as used by the tool. Try: + +```shell +$ LIMIT_MB=50 +$ ( ulimit -Sv $[LIMIT_MB << 10]; /path/to/tested_binary ... ) +``` + +Change LIMIT_MB to match the -m parameter passed to afl-fuzz. On OpenBSD, +also change -Sv to -Sd. + +Any existing output directory can be also used to resume aborted jobs; try: + +```shell +$ ./afl-fuzz -i- -o existing_output_dir [...etc...] +``` + +If you have gnuplot installed, you can also generate some pretty graphs for any +active fuzzing task using afl-plot. For an example of how this looks like, +see [http://lcamtuf.coredump.cx/afl/plot/](http://lcamtuf.coredump.cx/afl/plot/). + +## 8) Parallelized fuzzing + +Every instance of afl-fuzz takes up roughly one core. This means that on +multi-core systems, parallelization is necessary to fully utilize the hardware. +For tips on how to fuzz a common target on multiple cores or multiple networked +machines, please refer to [parallel_fuzzing.txt](docs/parallel_fuzzing.txt). + +The parallel fuzzing mode also offers a simple way for interfacing AFL to other +fuzzers, to symbolic or concolic execution engines, and so forth; again, see the +last section of [parallel_fuzzing.txt](docs/parallel_fuzzing.txt) for tips. + +## 9) Fuzzer dictionaries + +By default, afl-fuzz mutation engine is optimized for compact data formats - +say, images, multimedia, compressed data, regular expression syntax, or shell +scripts. It is somewhat less suited for languages with particularly verbose and +redundant verbiage - notably including HTML, SQL, or JavaScript. + +To avoid the hassle of building syntax-aware tools, afl-fuzz provides a way to +seed the fuzzing process with an optional dictionary of language keywords, +magic headers, or other special tokens associated with the targeted data type +-- and use that to reconstruct the underlying grammar on the go: + + [http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html](http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html) + +To use this feature, you first need to create a dictionary in one of the two +formats discussed in dictionaries/README.dictionaries; and then point the fuzzer +to it via the -x option in the command line. + +(Several common dictionaries are already provided in that subdirectory, too.) + +There is no way to provide more structured descriptions of the underlying +syntax, but the fuzzer will likely figure out some of this based on the +instrumentation feedback alone. This actually works in practice, say: + + [http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html](http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html) + +PS. Even when no explicit dictionary is given, afl-fuzz will try to extract +existing syntax tokens in the input corpus by watching the instrumentation +very closely during deterministic byte flips. This works for some types of +parsers and grammars, but isn't nearly as good as the -x mode. + +If a dictionary is really hard to come by, another option is to let AFL run +for a while, and then use the token capture library that comes as a companion +utility with AFL. For that, see libtokencap/README.tokencap. + +## 10) Crash triage + +The coverage-based grouping of crashes usually produces a small data set that +can be quickly triaged manually or with a very simple GDB or Valgrind script. +Every crash is also traceable to its parent non-crashing test case in the +queue, making it easier to diagnose faults. + +Having said that, it's important to acknowledge that some fuzzing crashes can be +difficult to quickly evaluate for exploitability without a lot of debugging and +code analysis work. To assist with this task, afl-fuzz supports a very unique +"crash exploration" mode enabled with the -C flag. + +In this mode, the fuzzer takes one or more crashing test cases as the input, +and uses its feedback-driven fuzzing strategies to very quickly enumerate all +code paths that can be reached in the program while keeping it in the +crashing state. + +Mutations that do not result in a crash are rejected; so are any changes that +do not affect the execution path. + +The output is a small corpus of files that can be very rapidly examined to see +what degree of control the attacker has over the faulting address, or whether +it is possible to get past an initial out-of-bounds read - and see what lies +beneath. + +Oh, one more thing: for test case minimization, give afl-tmin a try. The tool +can be operated in a very simple way: + +```shell +$ ./afl-tmin -i test_case -o minimized_result -- /path/to/program [...] +``` + +The tool works with crashing and non-crashing test cases alike. In the crash +mode, it will happily accept instrumented and non-instrumented binaries. In the +non-crashing mode, the minimizer relies on standard AFL instrumentation to make +the file simpler without altering the execution path. + +The minimizer accepts the -m, -t, -f and @@ syntax in a manner compatible with +afl-fuzz. + +Another recent addition to AFL is the afl-analyze tool. It takes an input +file, attempts to sequentially flip bytes, and observes the behavior of the +tested program. It then color-codes the input based on which sections appear to +be critical, and which are not; while not bulletproof, it can often offer quick +insights into complex file formats. More info about its operation can be found +near the end of [technical_details.txt](docs/technical_details.txt). + +## 11) Going beyond crashes + +Fuzzing is a wonderful and underutilized technique for discovering non-crashing +design and implementation errors, too. Quite a few interesting bugs have been +found by modifying the target programs to call abort() when, say: + + - Two bignum libraries produce different outputs when given the same + fuzzer-generated input, + + - An image library produces different outputs when asked to decode the same + input image several times in a row, + + - A serialization / deserialization library fails to produce stable outputs + when iteratively serializing and deserializing fuzzer-supplied data, + + - A compression library produces an output inconsistent with the input file + when asked to compress and then decompress a particular blob. + +Implementing these or similar sanity checks usually takes very little time; +if you are the maintainer of a particular package, you can make this code +conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also +shared with libfuzzer) or `#ifdef __AFL_COMPILER` (this one is just for AFL). + +## 12) Common-sense risks + +Please keep in mind that, similarly to many other computationally-intensive +tasks, fuzzing may put strain on your hardware and on the OS. In particular: + + - Your CPU will run hot and will need adequate cooling. In most cases, if + cooling is insufficient or stops working properly, CPU speeds will be + automatically throttled. That said, especially when fuzzing on less + suitable hardware (laptops, smartphones, etc), it's not entirely impossible + for something to blow up. + + - Targeted programs may end up erratically grabbing gigabytes of memory or + filling up disk space with junk files. AFL tries to enforce basic memory + limits, but can't prevent each and every possible mishap. The bottom line + is that you shouldn't be fuzzing on systems where the prospect of data loss + is not an acceptable risk. + + - Fuzzing involves billions of reads and writes to the filesystem. On modern + systems, this will be usually heavily cached, resulting in fairly modest + "physical" I/O - but there are many factors that may alter this equation. + It is your responsibility to monitor for potential trouble; with very heavy + I/O, the lifespan of many HDDs and SSDs may be reduced. + + A good way to monitor disk I/O on Linux is the 'iostat' command: + +```shell + $ iostat -d 3 -x -k [...optional disk ID...] +``` + +## 13) Known limitations & areas for improvement + +Here are some of the most important caveats for AFL: + + - AFL detects faults by checking for the first spawned process dying due to + a signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for + these signals may need to have the relevant code commented out. In the same + vein, faults in child processed spawned by the fuzzed target may evade + detection unless you manually add some code to catch that. + + - As with any other brute-force tool, the fuzzer offers limited coverage if + encryption, checksums, cryptographic signatures, or compression are used to + wholly wrap the actual data format to be tested. + + To work around this, you can comment out the relevant checks (see + experimental/libpng_no_checksum/ for inspiration); if this is not possible, + you can also write a postprocessor, as explained in + experimental/post_library/. + + - There are some unfortunate trade-offs with ASAN and 64-bit binaries. This + isn't due to any specific fault of afl-fuzz; see [notes_for_asan.txt](docs/notes_for_asan.txt) + for tips. + + - There is no direct support for fuzzing network services, background + daemons, or interactive apps that require UI interaction to work. You may + need to make simple code changes to make them behave in a more traditional + way. Preeny may offer a relatively simple option, too - see: + https://github.com/zardus/preeny + + Some useful tips for modifying network-based services can be also found at: + https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop + + - AFL doesn't output human-readable coverage data. If you want to monitor + coverage, use afl-cov from Michael Rash: https://github.com/mrash/afl-cov + + - Occasionally, sentient machines rise against their creators. If this + happens to you, please consult http://lcamtuf.coredump.cx/prep/. + +Beyond this, see INSTALL for platform-specific tips. + +## 14) Special thanks + +Many of the improvements to afl-fuzz wouldn't be possible without feedback, +bug reports, or patches from: + +``` + Jann Horn Hanno Boeck + Felix Groebert Jakub Wilk + Richard W. M. Jones Alexander Cherepanov + Tom Ritter Hovik Manucharyan + Sebastian Roschke Eberhard Mattes + Padraig Brady Ben Laurie + @dronesec Luca Barbato + Tobias Ospelt Thomas Jarosch + Martin Carpenter Mudge Zatko + Joe Zbiciak Ryan Govostes + Michael Rash William Robinet + Jonathan Gray Filipe Cabecinhas + Nico Weber Jodie Cunningham + Andrew Griffiths Parker Thompson + Jonathan Neuschfer Tyler Nighswander + Ben Nagy Samir Aguiar + Aidan Thornton Aleksandar Nikolich + Sam Hakim Laszlo Szekeres + David A. Wheeler Turo Lamminen + Andreas Stieger Richard Godbee + Louis Dassy teor2345 + Alex Moneger Dmitry Vyukov + Keegan McAllister Kostya Serebryany + Richo Healey Martijn Bogaard + rc0r Jonathan Foote + Christian Holler Dominique Pelle + Jacek Wielemborek Leo Barnes + Jeremy Barnes Jeff Trull + Guillaume Endignoux ilovezfs + Daniel Godas-Lopez Franjo Ivancic + Austin Seipp Daniel Komaromy + Daniel Binderman Jonathan Metzman + Vegard Nossum Jan Kneschke + Kurt Roeckx Marcel Bohme + Van-Thuan Pham Abhik Roychoudhury + Joshua J. Drake Toby Hutton + Rene Freingruber Sergey Davidoff + Sami Liedes Craig Young + Andrzej Jackowski Daniel Hodson +``` + +Thank you! + +## 15) Contact + +Questions? Concerns? Bug reports? Please use GitHub. + +There is also a mailing list for the project; to join, send a mail to +. Or, if you prefer to browse +archives first, try: [https://groups.google.com/group/afl-users](https://groups.google.com/group/afl-users). diff --git a/README1.md b/README1.md deleted file mode 100644 index 6e34d4e..0000000 --- a/README1.md +++ /dev/null @@ -1,493 +0,0 @@ -# american fuzzy lop - -[![Build Status](https://travis-ci.org/google/AFL.svg?branch=master)](https://travis-ci.org/google/AFL) - -Originally developed by Michal Zalewski . - -See [QuickStartGuide.txt](docs/QuickStartGuide.txt) if you don't have time to read -this file. - -## 1) Challenges of guided fuzzing - -Fuzzing is one of the most powerful and proven strategies for identifying -security issues in real-world software; it is responsible for the vast -majority of remote code execution and privilege escalation bugs found to date -in security-critical software. - -Unfortunately, fuzzing is also relatively shallow; blind, random mutations -make it very unlikely to reach certain code paths in the tested code, leaving -some vulnerabilities firmly outside the reach of this technique. - -There have been numerous attempts to solve this problem. One of the early -approaches - pioneered by Tavis Ormandy - is corpus distillation. The method -relies on coverage signals to select a subset of interesting seeds from a -massive, high-quality corpus of candidate files, and then fuzz them by -traditional means. The approach works exceptionally well, but requires such -a corpus to be readily available. In addition, block coverage measurements -provide only a very simplistic understanding of program state, and are less -useful for guiding the fuzzing effort in the long haul. - -Other, more sophisticated research has focused on techniques such as program -flow analysis ("concolic execution"), symbolic execution, or static analysis. -All these methods are extremely promising in experimental settings, but tend -to suffer from reliability and performance problems in practical uses - and -currently do not offer a viable alternative to "dumb" fuzzing techniques. - -## 2) The afl-fuzz approach - -American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple -but rock-solid instrumentation-guided genetic algorithm. It uses a modified -form of edge coverage to effortlessly pick up subtle, local-scale changes to -program control flow. - -Simplifying a bit, the overall algorithm can be summed up as: - - 1) Load user-supplied initial test cases into the queue, - - 2) Take next input file from the queue, - - 3) Attempt to trim the test case to the smallest size that doesn't alter - the measured behavior of the program, - - 4) Repeatedly mutate the file using a balanced and well-researched variety - of traditional fuzzing strategies, - - 5) If any of the generated mutations resulted in a new state transition - recorded by the instrumentation, add mutated output as a new entry in the - queue. - - 6) Go to 2. - -The discovered test cases are also periodically culled to eliminate ones that -have been obsoleted by newer, higher-coverage finds; and undergo several other -instrumentation-driven effort minimization steps. - -As a side result of the fuzzing process, the tool creates a small, -self-contained corpus of interesting test cases. These are extremely useful -for seeding other, labor- or resource-intensive testing regimes - for example, -for stress-testing browsers, office applications, graphics suites, or -closed-source tools. - -The fuzzer is thoroughly tested to deliver out-of-the-box performance far -superior to blind fuzzing or coverage-only tools. - -## 3) Instrumenting programs for use with AFL - -When source code is available, instrumentation can be injected by a companion -tool that works as a drop-in replacement for gcc or clang in any standard build -process for third-party code. - -The instrumentation has a fairly modest performance impact; in conjunction with -other optimizations implemented by afl-fuzz, most programs can be fuzzed as fast -or even faster than possible with traditional tools. - -The correct way to recompile the target program may vary depending on the -specifics of the build process, but a nearly-universal approach would be: - -```shell -$ CC=/path/to/afl/afl-gcc ./configure -$ make clean all -``` - -For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`. - -The clang wrappers (afl-clang and afl-clang++) can be used in the same way; -clang users may also opt to leverage a higher-performance instrumentation mode, -as described in llvm_mode/README.llvm. - -When testing libraries, you need to find or write a simple program that reads -data from stdin or from a file and passes it to the tested library. In such a -case, it is essential to link this executable against a static version of the -instrumented library, or to make sure that the correct .so file is loaded at -runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static -build, usually possible via: - -```shell -$ CC=/path/to/afl/afl-gcc ./configure --disable-shared -``` - -Setting `AFL_HARDEN=1` when calling 'make' will cause the CC wrapper to -automatically enable code hardening options that make it easier to detect -simple memory bugs. Libdislocator, a helper library included with AFL (see -libdislocator/README.dislocator) can help uncover heap corruption issues, too. - -PS. ASAN users are advised to review [notes_for_asan.txt](docs/notes_for_asan.txt) file for important -caveats. - -## 4) Instrumenting binary-only apps - -When source code is *NOT* available, the fuzzer offers experimental support for -fast, on-the-fly instrumentation of black-box binaries. This is accomplished -with a version of QEMU running in the lesser-known "user space emulation" mode. - -QEMU is a project separate from AFL, but you can conveniently build the -feature by doing: - -```shell -$ cd qemu_mode -$ ./build_qemu_support.sh -``` - -For additional instructions and caveats, see qemu_mode/README.qemu. - -The mode is approximately 2-5x slower than compile-time instrumentation, is -less conducive to parallelization, and may have some other quirks. - -## 5) Choosing initial test cases - -To operate correctly, the fuzzer requires one or more starting file that -contains a good example of the input data normally expected by the targeted -application. There are two basic rules: - - - Keep the files small. Under 1 kB is ideal, although not strictly necessary. - For a discussion of why size matters, see [perf_tips.txt](docs/perf_tips.txt). - - - Use multiple test cases only if they are functionally different from - each other. There is no point in using fifty different vacation photos - to fuzz an image library. - -You can find many good examples of starting files in the testcases/ subdirectory -that comes with this tool. - -PS. If a large corpus of data is available for screening, you may want to use -the afl-cmin utility to identify a subset of functionally distinct files that -exercise different code paths in the target binary. - -## 6) Fuzzing binaries - -The fuzzing process itself is carried out by the afl-fuzz utility. This program -requires a read-only directory with initial test cases, a separate place to -store its findings, plus a path to the binary to test. - -For target binaries that accept input directly from stdin, the usual syntax is: - -```shell -$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program [...params...] -``` - -For programs that take input from a file, use '@@' to mark the location in -the target's command line where the input file name should be placed. The -fuzzer will substitute this for you: - -```shell -$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@ -``` - -You can also use the -f option to have the mutated data written to a specific -file. This is useful if the program expects a particular file extension or so. - -Non-instrumented binaries can be fuzzed in the QEMU mode (add -Q in the command -line) or in a traditional, blind-fuzzer mode (specify -n). - -You can use -t and -m to override the default timeout and memory limit for the -executed process; rare examples of targets that may need these settings touched -include compilers and video decoders. - -Tips for optimizing fuzzing performance are discussed in [perf_tips.txt](docs/perf_tips.txt). - -Note that afl-fuzz starts by performing an array of deterministic fuzzing -steps, which can take several days, but tend to produce neat test cases. If you -want quick & dirty results right away - akin to zzuf and other traditional -fuzzers - add the -d option to the command line. - -## 7) Interpreting output - -See the [status_screen.txt](docs/status_screen.txt) file for information on -how to interpret the displayed stats and monitor the health of the process. -Be sure to consult this file especially if any UI elements are highlighted in -red. - -The fuzzing process will continue until you press Ctrl-C. At minimum, you want -to allow the fuzzer to complete one queue cycle, which may take anywhere from a -couple of hours to a week or so. - -There are three subdirectories created within the output directory and updated -in real time: - - - queue/ - test cases for every distinctive execution path, plus all the - starting files given by the user. This is the synthesized corpus - mentioned in section 2. - Before using this corpus for any other purposes, you can shrink - it to a smaller size using the afl-cmin tool. The tool will find - a smaller subset of files offering equivalent edge coverage. - - - crashes/ - unique test cases that cause the tested program to receive a - fatal signal (e.g., SIGSEGV, SIGILL, SIGABRT). The entries are - grouped by the received signal. - - - hangs/ - unique test cases that cause the tested program to time out. The - default time limit before something is classified as a hang is - the larger of 1 second and the value of the -t parameter. - The value can be fine-tuned by setting AFL_HANG_TMOUT, but this - is rarely necessary. - -Crashes and hangs are considered "unique" if the associated execution paths -involve any state transitions not seen in previously-recorded faults. If a -single bug can be reached in multiple ways, there will be some count inflation -early in the process, but this should quickly taper off. - -The file names for crashes and hangs are correlated with parent, non-faulting -queue entries. This should help with debugging. - -When you can't reproduce a crash found by afl-fuzz, the most likely cause is -that you are not setting the same memory limit as used by the tool. Try: - -```shell -$ LIMIT_MB=50 -$ ( ulimit -Sv $[LIMIT_MB << 10]; /path/to/tested_binary ... ) -``` - -Change LIMIT_MB to match the -m parameter passed to afl-fuzz. On OpenBSD, -also change -Sv to -Sd. - -Any existing output directory can be also used to resume aborted jobs; try: - -```shell -$ ./afl-fuzz -i- -o existing_output_dir [...etc...] -``` - -If you have gnuplot installed, you can also generate some pretty graphs for any -active fuzzing task using afl-plot. For an example of how this looks like, -see [http://lcamtuf.coredump.cx/afl/plot/](http://lcamtuf.coredump.cx/afl/plot/). - -## 8) Parallelized fuzzing - -Every instance of afl-fuzz takes up roughly one core. This means that on -multi-core systems, parallelization is necessary to fully utilize the hardware. -For tips on how to fuzz a common target on multiple cores or multiple networked -machines, please refer to [parallel_fuzzing.txt](docs/parallel_fuzzing.txt). - -The parallel fuzzing mode also offers a simple way for interfacing AFL to other -fuzzers, to symbolic or concolic execution engines, and so forth; again, see the -last section of [parallel_fuzzing.txt](docs/parallel_fuzzing.txt) for tips. - -## 9) Fuzzer dictionaries - -By default, afl-fuzz mutation engine is optimized for compact data formats - -say, images, multimedia, compressed data, regular expression syntax, or shell -scripts. It is somewhat less suited for languages with particularly verbose and -redundant verbiage - notably including HTML, SQL, or JavaScript. - -To avoid the hassle of building syntax-aware tools, afl-fuzz provides a way to -seed the fuzzing process with an optional dictionary of language keywords, -magic headers, or other special tokens associated with the targeted data type --- and use that to reconstruct the underlying grammar on the go: - - [http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html](http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html) - -To use this feature, you first need to create a dictionary in one of the two -formats discussed in dictionaries/README.dictionaries; and then point the fuzzer -to it via the -x option in the command line. - -(Several common dictionaries are already provided in that subdirectory, too.) - -There is no way to provide more structured descriptions of the underlying -syntax, but the fuzzer will likely figure out some of this based on the -instrumentation feedback alone. This actually works in practice, say: - - [http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html](http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html) - -PS. Even when no explicit dictionary is given, afl-fuzz will try to extract -existing syntax tokens in the input corpus by watching the instrumentation -very closely during deterministic byte flips. This works for some types of -parsers and grammars, but isn't nearly as good as the -x mode. - -If a dictionary is really hard to come by, another option is to let AFL run -for a while, and then use the token capture library that comes as a companion -utility with AFL. For that, see libtokencap/README.tokencap. - -## 10) Crash triage - -The coverage-based grouping of crashes usually produces a small data set that -can be quickly triaged manually or with a very simple GDB or Valgrind script. -Every crash is also traceable to its parent non-crashing test case in the -queue, making it easier to diagnose faults. - -Having said that, it's important to acknowledge that some fuzzing crashes can be -difficult to quickly evaluate for exploitability without a lot of debugging and -code analysis work. To assist with this task, afl-fuzz supports a very unique -"crash exploration" mode enabled with the -C flag. - -In this mode, the fuzzer takes one or more crashing test cases as the input, -and uses its feedback-driven fuzzing strategies to very quickly enumerate all -code paths that can be reached in the program while keeping it in the -crashing state. - -Mutations that do not result in a crash are rejected; so are any changes that -do not affect the execution path. - -The output is a small corpus of files that can be very rapidly examined to see -what degree of control the attacker has over the faulting address, or whether -it is possible to get past an initial out-of-bounds read - and see what lies -beneath. - -Oh, one more thing: for test case minimization, give afl-tmin a try. The tool -can be operated in a very simple way: - -```shell -$ ./afl-tmin -i test_case -o minimized_result -- /path/to/program [...] -``` - -The tool works with crashing and non-crashing test cases alike. In the crash -mode, it will happily accept instrumented and non-instrumented binaries. In the -non-crashing mode, the minimizer relies on standard AFL instrumentation to make -the file simpler without altering the execution path. - -The minimizer accepts the -m, -t, -f and @@ syntax in a manner compatible with -afl-fuzz. - -Another recent addition to AFL is the afl-analyze tool. It takes an input -file, attempts to sequentially flip bytes, and observes the behavior of the -tested program. It then color-codes the input based on which sections appear to -be critical, and which are not; while not bulletproof, it can often offer quick -insights into complex file formats. More info about its operation can be found -near the end of [technical_details.txt](docs/technical_details.txt). - -## 11) Going beyond crashes - -Fuzzing is a wonderful and underutilized technique for discovering non-crashing -design and implementation errors, too. Quite a few interesting bugs have been -found by modifying the target programs to call abort() when, say: - - - Two bignum libraries produce different outputs when given the same - fuzzer-generated input, - - - An image library produces different outputs when asked to decode the same - input image several times in a row, - - - A serialization / deserialization library fails to produce stable outputs - when iteratively serializing and deserializing fuzzer-supplied data, - - - A compression library produces an output inconsistent with the input file - when asked to compress and then decompress a particular blob. - -Implementing these or similar sanity checks usually takes very little time; -if you are the maintainer of a particular package, you can make this code -conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also -shared with libfuzzer) or `#ifdef __AFL_COMPILER` (this one is just for AFL). - -## 12) Common-sense risks - -Please keep in mind that, similarly to many other computationally-intensive -tasks, fuzzing may put strain on your hardware and on the OS. In particular: - - - Your CPU will run hot and will need adequate cooling. In most cases, if - cooling is insufficient or stops working properly, CPU speeds will be - automatically throttled. That said, especially when fuzzing on less - suitable hardware (laptops, smartphones, etc), it's not entirely impossible - for something to blow up. - - - Targeted programs may end up erratically grabbing gigabytes of memory or - filling up disk space with junk files. AFL tries to enforce basic memory - limits, but can't prevent each and every possible mishap. The bottom line - is that you shouldn't be fuzzing on systems where the prospect of data loss - is not an acceptable risk. - - - Fuzzing involves billions of reads and writes to the filesystem. On modern - systems, this will be usually heavily cached, resulting in fairly modest - "physical" I/O - but there are many factors that may alter this equation. - It is your responsibility to monitor for potential trouble; with very heavy - I/O, the lifespan of many HDDs and SSDs may be reduced. - - A good way to monitor disk I/O on Linux is the 'iostat' command: - -```shell - $ iostat -d 3 -x -k [...optional disk ID...] -``` - -## 13) Known limitations & areas for improvement - -Here are some of the most important caveats for AFL: - - - AFL detects faults by checking for the first spawned process dying due to - a signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for - these signals may need to have the relevant code commented out. In the same - vein, faults in child processed spawned by the fuzzed target may evade - detection unless you manually add some code to catch that. - - - As with any other brute-force tool, the fuzzer offers limited coverage if - encryption, checksums, cryptographic signatures, or compression are used to - wholly wrap the actual data format to be tested. - - To work around this, you can comment out the relevant checks (see - experimental/libpng_no_checksum/ for inspiration); if this is not possible, - you can also write a postprocessor, as explained in - experimental/post_library/. - - - There are some unfortunate trade-offs with ASAN and 64-bit binaries. This - isn't due to any specific fault of afl-fuzz; see [notes_for_asan.txt](docs/notes_for_asan.txt) - for tips. - - - There is no direct support for fuzzing network services, background - daemons, or interactive apps that require UI interaction to work. You may - need to make simple code changes to make them behave in a more traditional - way. Preeny may offer a relatively simple option, too - see: - https://github.com/zardus/preeny - - Some useful tips for modifying network-based services can be also found at: - https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop - - - AFL doesn't output human-readable coverage data. If you want to monitor - coverage, use afl-cov from Michael Rash: https://github.com/mrash/afl-cov - - - Occasionally, sentient machines rise against their creators. If this - happens to you, please consult http://lcamtuf.coredump.cx/prep/. - -Beyond this, see INSTALL for platform-specific tips. - -## 14) Special thanks - -Many of the improvements to afl-fuzz wouldn't be possible without feedback, -bug reports, or patches from: - -``` - Jann Horn Hanno Boeck - Felix Groebert Jakub Wilk - Richard W. M. Jones Alexander Cherepanov - Tom Ritter Hovik Manucharyan - Sebastian Roschke Eberhard Mattes - Padraig Brady Ben Laurie - @dronesec Luca Barbato - Tobias Ospelt Thomas Jarosch - Martin Carpenter Mudge Zatko - Joe Zbiciak Ryan Govostes - Michael Rash William Robinet - Jonathan Gray Filipe Cabecinhas - Nico Weber Jodie Cunningham - Andrew Griffiths Parker Thompson - Jonathan Neuschfer Tyler Nighswander - Ben Nagy Samir Aguiar - Aidan Thornton Aleksandar Nikolich - Sam Hakim Laszlo Szekeres - David A. Wheeler Turo Lamminen - Andreas Stieger Richard Godbee - Louis Dassy teor2345 - Alex Moneger Dmitry Vyukov - Keegan McAllister Kostya Serebryany - Richo Healey Martijn Bogaard - rc0r Jonathan Foote - Christian Holler Dominique Pelle - Jacek Wielemborek Leo Barnes - Jeremy Barnes Jeff Trull - Guillaume Endignoux ilovezfs - Daniel Godas-Lopez Franjo Ivancic - Austin Seipp Daniel Komaromy - Daniel Binderman Jonathan Metzman - Vegard Nossum Jan Kneschke - Kurt Roeckx Marcel Bohme - Van-Thuan Pham Abhik Roychoudhury - Joshua J. Drake Toby Hutton - Rene Freingruber Sergey Davidoff - Sami Liedes Craig Young - Andrzej Jackowski Daniel Hodson -``` - -Thank you! - -## 15) Contact - -Questions? Concerns? Bug reports? Please use GitHub. - -There is also a mailing list for the project; to join, send a mail to -. Or, if you prefer to browse -archives first, try: [https://groups.google.com/group/afl-users](https://groups.google.com/group/afl-users). diff --git a/as b/as deleted file mode 100644 index b81beac..0000000 --- a/as +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/sh -# -# american fuzzy lop - clang assembly normalizer -# ---------------------------------------------- -# -# Written and maintained by Michal Zalewski -# The idea for this wrapper comes from Ryan Govostes. -# -# Copyright 2013, 2014 Google LLC All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# This 'as' wrapper should allow you to instrument unruly, hand-written -# assembly with afl-as. -# -# Usage: -# -# export AFL_REAL_PATH=/path/to/directory/with/afl-as/ -# AFL_PATH=/path/to/this/directory/ make clean all - -if [ "$#" -lt "2" ]; then - echo "[-] Error: this utility can't be called directly." 1>&2 - exit 1 -fi - -if [ "$AFL_REAL_PATH" = "" ]; then - echo "[-] Error: AFL_REAL_PATH not set!" 1>&2 - exit 1 -fi - -if [ ! -x "$AFL_REAL_PATH/afl-as" ]; then - echo "[-] Error: AFL_REAL_PATH does not contain the 'afl-as' binary." 1>&2 - exit 1 -fi - -unset __AFL_AS_CMDLINE __AFL_FNAME - -while [ ! "$#" = "0" ]; do - - if [ "$#" = "1" ]; then - __AFL_FNAME="$1" - else - __AFL_AS_CMDLINE="${__AFL_AS_CMDLINE} $1" - fi - - shift - -done - -test "$TMPDIR" = "" && TMPDIR=/tmp - -TMPFILE=`mktemp $TMPDIR/.afl-XXXXXXXXXX.s` - -test "$TMPFILE" = "" && exit 1 - -clang -cc1as -filetype asm -output-asm-variant 0 "${__AFL_FNAME}" >"$TMPFILE" - -ERR="$?" - -if [ ! "$ERR" = "0" ]; then - rm -f "$TMPFILE" - exit $ERR -fi - -"$AFL_REAL_PATH/afl-as" ${__AFL_AS_CMDLINE} "$TMPFILE" - -ERR="$?" - -rm -f "$TMPFILE" - -exit "$ERR" diff --git a/dictionaries/README.dictionaries b/dictionaries/README.dictionaries new file mode 100644 index 0000000..ea31973 --- /dev/null +++ b/dictionaries/README.dictionaries @@ -0,0 +1,43 @@ +================ +AFL dictionaries +================ + + (See ../docs/README for the general instruction manual.) + +This subdirectory contains a set of dictionaries that can be used in +conjunction with the -x option to allow the fuzzer to effortlessly explore the +grammar of some of the more verbose data formats or languages. The basic +principle behind the operation of fuzzer dictionaries is outlined in section 9 +of the "main" README for the project. + +Custom dictionaries can be added at will. They should consist of a +reasonably-sized set of rudimentary syntax units that the fuzzer will then try +to clobber together in various ways. Snippets between 2 and 16 bytes are usually +the sweet spot. + +Custom dictionaries can be created in two ways: + + - By creating a new directory and placing each token in a separate file, in + which case, there is no need to escape or otherwise format the data. + + - By creating a flat text file where tokens are listed one per line in the + format of name="value". The alphanumeric name is ignored and can be omitted, + although it is a convenient way to document the meaning of a particular + token. The value must appear in quotes, with hex escaping (\xNN) applied to + all non-printable, high-bit, or otherwise problematic characters (\\ and \" + shorthands are recognized, too). + +The fuzzer auto-selects the appropriate mode depending on whether the -x +parameter is a file or a directory. + +In the file mode, every name field can be optionally followed by @, e.g.: + + keyword_foo@1 = "foo" + +Such entries will be loaded only if the requested dictionary level is equal or +higher than this number. The default level is zero; a higher value can be set +by appending @ to the dictionary file name, like so: + + -x path/to/dictionary.dct@2 + +Good examples of dictionaries can be found in xml.dict and png.dict. diff --git a/dictionaries/gif.dict b/dictionaries/gif.dict new file mode 100644 index 0000000..7114893 --- /dev/null +++ b/dictionaries/gif.dict @@ -0,0 +1,18 @@ +# +# AFL dictionary for GIF images +# ----------------------------- +# +# Created by Michal Zalewski +# + +header_87a="87a" +header_89a="89a" +header_gif="GIF" + +marker_2c="," +marker_3b=";" + +section_2101="!\x01\x12" +section_21f9="!\xf9\x04" +section_21fe="!\xfe" +section_21ff="!\xff\x11" diff --git a/dictionaries/html_tags.dict b/dictionaries/html_tags.dict new file mode 100644 index 0000000..ba946df --- /dev/null +++ b/dictionaries/html_tags.dict @@ -0,0 +1,160 @@ +# +# AFL dictionary for HTML parsers (tags only) +# ------------------------------------------- +# +# A basic collection of HTML tags likely to matter to HTML parsers. Does *not* +# include any attributes or attribute values. +# +# Created by Michal Zalewski +# + +tag_a="" +tag_abbr="" +tag_acronym="" +tag_address="
" +tag_annotation_xml="" +tag_applet="" +tag_area="" +tag_article="
" +tag_aside="