Add source code and README for AFL code reading project

main
Satori5ama 1 month ago
parent 418c4217c1
commit 6195773515

@ -1,2 +1,75 @@
# google_AFL # AFL 源码阅读项目
## 项目介绍
本项目旨在深入研究和分析 American Fuzzy Lop (AFL) 的源代码。AFL 是一个强大的模糊测试工具,广泛用于发现软件中的安全漏洞。通过对 AFL 源代码的阅读和分析,我们希望能够更好地理解其工作原理、设计理念以及如何有效地使用该工具进行安全测试。
## 目标
- 理解 AFL 的核心算法和实现细节
- 分析 AFL 在模糊测试中的应用
- 探讨 AFL 的性能优化和扩展性
- 编写项目报告,总结我们的发现和学习
## 目录结构
```
project-root/
├── src/ # 存放 AFL 源代码的目录
├── doc/ # 存放项目文档和报告的目录
└── README.md # 项目说明文件
```
## 环境准备
在开始阅读和分析 AFL 源代码之前,请确保你的开发环境中已安装以下工具:
- Git
- C 编译器(如 GCC
- Make 工具
- 其他依赖项(请参考 AFL 文档)
## 获取源代码
要获取 AFL 的源代码,请使用以下命令:
```bash
git clone https://github.com/google/AFL.git
```
## 编译和安装 AFL
`src` 目录下,运行以下命令以编译和安装 AFL
```bash
cd src
make
```
编译完成后AFL 的可执行文件将位于 `src` 目录中。
## 阅读分析
- 阅读 AFL 源代码,关注以下关键模块:
- **主循环**: 了解 AFL 如何管理模糊测试的整个流程。
- **输入变异**: 分析 AFL 如何生成和变异测试用例。
- **覆盖率跟踪**: 理解 AFL 如何收集代码覆盖率信息,以优化测试过程。
- 记录你的发现和疑问,准备在团队讨论中分享。
## 项目报告
在完成代码阅读和分析后,请编写项目报告,文件名为 `AFL_阅读报告.docx`,并将其放置在 `doc` 目录中。报告应包含以下内容:
- 项目背景和目的
- 关键模块的分析
- 发现的关键问题和解决方案
- 总结和未来工作
## 贡献
欢迎任何对本项目感兴趣的贡献者参与!如果你有建议或发现了问题,请通过提交 issue 或 pull request 的方式与我们联系。
## 许可证
本项目遵循 MIT 许可证。有关详细信息,请参阅 LICENSE 文件。

20
src/.gitignore vendored

@ -0,0 +1,20 @@
# Binaries produced by "make".
afl-analyze
afl-as
afl-clang
afl-clang++
afl-fuzz
afl-g++
afl-gcc
afl-gotcpu
afl-showmap
afl-tmin
as
# Binaries produced by "make -C llvm_mode"
afl-clang-fast
afl-clang-fast++
afl-llvm-pass.so
afl-llvm-rt-32.o
afl-llvm-rt-64.o
afl-llvm-rt.o

@ -0,0 +1,60 @@
language: c
env:
- AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_STOP_MANUALLY=1
- AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_EXIT_WHEN_DONE=1
# TODO: test AFL_BENCH_UNTIL_CRASH once we have a target that crashes
- AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_BENCH_JUST_ONE=1
before_install:
- sudo apt update
- sudo apt install -y libtool libtool-bin automake bison libglib2.0
# TODO: Look into splitting off some builds using a build matrix.
# TODO: Move this all into a bash script so we don't need to write bash in yaml.
script:
- make
- ./afl-gcc ./test-instr.c -o test-instr-gcc
- mkdir seeds
- echo "" > seeds/nil_seed
- if [ -z "$AFL_STOP_MANUALLY" ];
then ./afl-fuzz -i seeds -o out/ -- ./test-instr-gcc;
else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-gcc;
fi
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 3
- rm -r out/*
- ./afl-clang ./test-instr.c -o test-instr-clang
- if [ -z "$AFL_STOP_MANUALLY" ];
then ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang;
else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang;
fi
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 2
- make clean
- CC=clang CXX=clang++ make
- cd llvm_mode
# TODO: Build with different versions of clang/LLVM since LLVM passes don't
# have a stable API.
- CC=clang CXX=clang++ LLVM_CONFIG=llvm-config make
- cd ..
- rm -r out/*
- ./afl-clang-fast ./test-instr.c -o test-instr-clang-fast
- if [ -z "$AFL_STOP_MANUALLY" ];
then ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang-fast;
else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang-fast;
fi
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 3
# Test fuzzing libFuzzer targets and trace-pc-guard instrumentation.
- clang -g -fsanitize-coverage=trace-pc-guard ./test-libfuzzer-target.c -c
- clang -c -w llvm_mode/afl-llvm-rt.o.c
- wget https://raw.githubusercontent.com/llvm/llvm-project/main/compiler-rt/lib/fuzzer/afl/afl_driver.cpp
- clang++ afl_driver.cpp afl-llvm-rt.o.o test-libfuzzer-target.o -o test-libfuzzer-target
- timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-libfuzzer-target
- cd qemu_mode
- ./build_qemu_support.sh
- cd ..
- gcc ./test-instr.c -o test-no-instr
- if [ -z "$AFL_STOP_MANUALLY" ];
then ./afl-fuzz -Q -i seeds -o out/ -- ./test-no-instr;
else timeout --preserve-status 5s ./afl-fuzz -Q -i seeds -o out/ -- ./test-no-instr;
fi
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 12 -p 9

@ -0,0 +1,60 @@
#!/bin/bash
usage() {
echo "Usage: $0 -o <out_dir> -k <key> -v <value> [-p <precision>]" 1>&2;
echo " " 1>&2;
echo "Checks if a key:value appears in the fuzzer_stats report" 1>&2;
echo " " 1>&2;
echo -n "If \"value\" is numeric and \"precision\" is defined, checks if the stat " 1>&2;
echo "printed by afl is value+/-precision." 1>&2;
exit 1; }
while getopts "o:k:v:p:" opt; do
case "${opt}" in
o)
o=${OPTARG}
;;
k)
k=${OPTARG}
;;
v)
v=${OPTARG}
;;
p)
p=${OPTARG}
;;
*)
usage
;;
esac
done
if [ -z $o ] || [ -z $k ] || [ -z $v ]; then usage; fi
# xargs to trim the surrounding whitespaces
stat_v=$( grep $k "$o"/fuzzer_stats | cut -d ":" -f 2 | xargs )
v=$( echo "$v" | xargs )
if [ -z stat_v ];
then echo "ERROR: key $k not found in fuzzer_stats." 1>&2
exit 1
fi
re_percent='^[0-9]+([.][0-9]+)?\%$'
# if the argument is a number in percentage, get rid of the %
if [[ "$v" =~ $re_percent ]]; then v=${v: :-1}; fi
if [[ "$stat_v" =~ $re_percent ]]; then stat_v=${stat_v: :-1}; fi
re_numeric='^[0-9]+([.][0-9]+)?$'
# if the argument is not a number, we check for strict equality
if (! [[ "$v" =~ $re_numeric ]]) || (! [[ "$stat_v" =~ $re ]]);
then if [ "$v" != "$stat_v" ];
then echo "ERROR: \"$k:$stat_v\" (should be $v)." 1>&2
exit 2;
fi
# checks if the stat reported by afl is in the range
elif [ "$stat_v" -lt $(( v - p )) ] || [ "$stat_v" -gt $(( v + p )) ];
then echo "ERROR: key $k:$stat_v is out of correct range." 1>&2
exit 3;
fi
echo "OK: key $k:$stat_v" 1>&2

@ -1 +0,0 @@
Subproject commit 61037103ae3722c8060ff7082994836a794f978e

@ -0,0 +1,140 @@
cc_defaults {
name: "afl-defaults",
cflags: [
"-funroll-loops",
"-Wno-pointer-sign",
"-Wno-pointer-arith",
"-Wno-sign-compare",
"-Wno-unused-parameter",
"-Wno-unused-function",
"-Wno-format",
"-Wno-user-defined-warnings",
"-DUSE_TRACE_PC=1",
"-DBIN_PATH=\"out/host/linux-x86/bin\"",
"-DDOC_PATH=\"out/host/linux-x86/shared/doc/afl\"",
"-D__USE_GNU",
],
}
cc_binary {
name: "afl-fuzz",
static_executable: true,
host_supported: true,
defaults: [
"afl-defaults",
],
srcs: [
"afl-fuzz.c",
],
}
cc_binary {
name: "afl-showmap",
static_executable: true,
host_supported: true,
defaults: [
"afl-defaults",
],
srcs: [
"afl-showmap.c",
],
}
cc_binary {
name: "afl-tmin",
static_executable: true,
host_supported: true,
defaults: [
"afl-defaults",
],
srcs: [
"afl-tmin.c",
],
}
cc_binary {
name: "afl-analyze",
static_executable: true,
host_supported: true,
defaults: [
"afl-defaults",
],
srcs: [
"afl-analyze.c",
],
}
cc_binary {
name: "afl-gotcpu",
static_executable: true,
host_supported: true,
defaults: [
"afl-defaults",
],
srcs: [
"afl-gotcpu.c",
],
}
cc_binary_host {
name: "afl-clang-fast",
static_executable: true,
defaults: [
"afl-defaults",
],
cflags: [
"-D__ANDROID__",
"-DAFL_PATH=\"out/host/linux-x86/lib64\"",
],
srcs: [
"llvm_mode/afl-clang-fast.c",
],
}
cc_binary_host {
name: "afl-clang-fast++",
static_executable: true,
defaults: [
"afl-defaults",
],
cflags: [
"-D__ANDROID__",
"-DAFL_PATH=\"out/host/linux-x86/lib64\"",
],
srcs: [
"llvm_mode/afl-clang-fast.c",
],
}
cc_library_static {
name: "afl-llvm-rt",
compile_multilib: "both",
vendor_available: true,
host_supported: true,
recovery_available: true,
defaults: [
"afl-defaults",
],
srcs: [
"llvm_mode/afl-llvm-rt.o.c",
],
}

@ -0,0 +1,28 @@
# How to Contribute
We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.
## Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution;
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one.
You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.
## Code reviews
All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.
## Community Guidelines
This project follows [Google's Open Source Community
Guidelines](https://opensource.google.com/conduct/).

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

@ -0,0 +1,153 @@
#
# american fuzzy lop - makefile
# -----------------------------
#
# Written and maintained by Michal Zalewski <lcamtuf@google.com>
#
# Copyright 2013, 2014, 2015, 2016, 2017 Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
PROGNAME = afl
VERSION = $(shell grep '^\#define VERSION ' config.h | cut -d '"' -f2)
PREFIX ?= /usr/local
BIN_PATH = $(PREFIX)/bin
HELPER_PATH = $(PREFIX)/lib/afl
DOC_PATH = $(PREFIX)/share/doc/afl
MISC_PATH = $(PREFIX)/share/afl
# PROGS intentionally omit afl-as, which gets installed elsewhere.
PROGS = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
SH_PROGS = afl-plot afl-cmin afl-whatsup
CFLAGS ?= -O3 -funroll-loops
CFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \
-DAFL_PATH=\"$(HELPER_PATH)\" -DDOC_PATH=\"$(DOC_PATH)\" \
-DBIN_PATH=\"$(BIN_PATH)\"
ifneq "$(filter Linux GNU%,$(shell uname))" ""
LDFLAGS += -ldl
endif
ifeq "$(findstring clang, $(shell $(CC) --version 2>/dev/null))" ""
TEST_CC = afl-gcc
else
TEST_CC = afl-clang
endif
COMM_HDR = alloc-inl.h config.h debug.h types.h
all: test_x86 $(PROGS) afl-as test_build all_done
ifndef AFL_NO_X86
test_x86:
@echo "[*] Checking for the ability to compile x86 code..."
@echo 'main() { __asm__("xorb %al, %al"); }' | $(CC) -w -x c - -o .test || ( echo; echo "Oops, looks like your compiler can't generate x86 code."; echo; echo "Don't panic! You can use the LLVM or QEMU mode, but see docs/INSTALL first."; echo "(To ignore this error, set AFL_NO_X86=1 and try again.)"; echo; exit 1 )
@rm -f .test
@echo "[+] Everything seems to be working, ready to compile."
else
test_x86:
@echo "[!] Note: skipping x86 compilation checks (AFL_NO_X86 set)."
endif
afl-gcc: afl-gcc.c $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
set -e; for i in afl-g++ afl-clang afl-clang++; do ln -sf afl-gcc $$i; done
afl-as: afl-as.c afl-as.h $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
ln -sf afl-as as
afl-fuzz: afl-fuzz.c $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
afl-showmap: afl-showmap.c $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
afl-tmin: afl-tmin.c $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
afl-analyze: afl-analyze.c $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
afl-gotcpu: afl-gotcpu.c $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
ifndef AFL_NO_X86
test_build: afl-gcc afl-as afl-showmap
@echo "[*] Testing the CC wrapper and instrumentation output..."
unset AFL_USE_ASAN AFL_USE_MSAN; AFL_QUIET=1 AFL_INST_RATIO=100 AFL_PATH=. ./$(TEST_CC) $(CFLAGS) test-instr.c -o test-instr $(LDFLAGS)
./afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
echo 1 | ./afl-showmap -m none -q -o .test-instr1 ./test-instr
@rm -f test-instr
@cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please ping <lcamtuf@google.com> to troubleshoot the issue."; echo; exit 1; fi
@echo "[+] All right, the instrumentation seems to be working!"
else
test_build: afl-gcc afl-as afl-showmap
@echo "[!] Note: skipping build tests (you may need to use LLVM or QEMU mode)."
endif
all_done: test_build
@if [ ! "`which clang 2>/dev/null`" = "" ]; then echo "[+] LLVM users: see llvm_mode/README.llvm for a faster alternative to afl-gcc."; fi
@echo "[+] All done! Be sure to review README - it's pretty short and useful."
@if [ "`uname`" = "Darwin" ]; then printf "\nWARNING: Fuzzing on MacOS X is slow because of the unusually high overhead of\nfork() on this OS. Consider using Linux or *BSD. You can also use VirtualBox\n(virtualbox.org) to put AFL inside a Linux or *BSD VM.\n\n"; fi
@! tty <&1 >/dev/null || printf "\033[0;30mNOTE: If you can read this, your terminal probably uses white background.\nThis will make the UI hard to read. See docs/status_screen.txt for advice.\033[0m\n" 2>/dev/null
.NOTPARALLEL: clean
clean:
rm -f $(PROGS) afl-as as afl-g++ afl-clang afl-clang++ *.o *~ a.out core core.[1-9][0-9]* *.stackdump test .test test-instr .test-instr0 .test-instr1 qemu_mode/qemu-2.10.0.tar.bz2 afl-qemu-trace
rm -rf out_dir qemu_mode/qemu-2.10.0
$(MAKE) -C llvm_mode clean
$(MAKE) -C libdislocator clean
$(MAKE) -C libtokencap clean
install: all
mkdir -p -m 755 $${DESTDIR}$(BIN_PATH) $${DESTDIR}$(HELPER_PATH) $${DESTDIR}$(DOC_PATH) $${DESTDIR}$(MISC_PATH)
rm -f $${DESTDIR}$(BIN_PATH)/afl-plot.sh
install -m 755 $(PROGS) $(SH_PROGS) $${DESTDIR}$(BIN_PATH)
rm -f $${DESTDIR}$(BIN_PATH)/afl-as
if [ -f afl-qemu-trace ]; then install -m 755 afl-qemu-trace $${DESTDIR}$(BIN_PATH); fi
ifndef AFL_TRACE_PC
if [ -f afl-clang-fast -a -f afl-llvm-pass.so -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 afl-llvm-pass.so afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi
else
if [ -f afl-clang-fast -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi
endif
if [ -f afl-llvm-rt-32.o ]; then set -e; install -m 755 afl-llvm-rt-32.o $${DESTDIR}$(HELPER_PATH); fi
if [ -f afl-llvm-rt-64.o ]; then set -e; install -m 755 afl-llvm-rt-64.o $${DESTDIR}$(HELPER_PATH); fi
set -e; for i in afl-g++ afl-clang afl-clang++; do ln -sf afl-gcc $${DESTDIR}$(BIN_PATH)/$$i; done
install -m 755 afl-as $${DESTDIR}$(HELPER_PATH)
ln -sf afl-as $${DESTDIR}$(HELPER_PATH)/as
install -m 644 README.md docs/ChangeLog docs/*.txt $${DESTDIR}$(DOC_PATH)
cp -r testcases/ $${DESTDIR}$(MISC_PATH)
cp -r dictionaries/ $${DESTDIR}$(MISC_PATH)
publish: clean
test "`basename $$PWD`" = "AFL" || exit 1
test -f ~/www/afl/releases/$(PROGNAME)-$(VERSION).tgz; if [ "$$?" = "0" ]; then echo; echo "Change program version in config.h, mmkay?"; echo; exit 1; fi
cd ..; rm -rf $(PROGNAME)-$(VERSION); cp -pr $(PROGNAME) $(PROGNAME)-$(VERSION); \
tar -cvz -f ~/www/afl/releases/$(PROGNAME)-$(VERSION).tgz $(PROGNAME)-$(VERSION)
chmod 644 ~/www/afl/releases/$(PROGNAME)-$(VERSION).tgz
( cd ~/www/afl/releases/; ln -s -f $(PROGNAME)-$(VERSION).tgz $(PROGNAME)-latest.tgz )
cat docs/README >~/www/afl/README.txt
cat docs/status_screen.txt >~/www/afl/status_screen.txt
cat docs/historical_notes.txt >~/www/afl/historical_notes.txt
cat docs/technical_details.txt >~/www/afl/technical_details.txt
cat docs/ChangeLog >~/www/afl/ChangeLog.txt
cat docs/QuickStartGuide.txt >~/www/afl/QuickStartGuide.txt
echo -n "$(VERSION)" >~/www/afl/version.txt

@ -0,0 +1,493 @@
# american fuzzy lop
[![Build Status](https://travis-ci.org/google/AFL.svg?branch=master)](https://travis-ci.org/google/AFL)
Originally developed by Michal Zalewski <lcamtuf@google.com>.
See [QuickStartGuide.txt](docs/QuickStartGuide.txt) if you don't have time to read
this file.
## 1) Challenges of guided fuzzing
Fuzzing is one of the most powerful and proven strategies for identifying
security issues in real-world software; it is responsible for the vast
majority of remote code execution and privilege escalation bugs found to date
in security-critical software.
Unfortunately, fuzzing is also relatively shallow; blind, random mutations
make it very unlikely to reach certain code paths in the tested code, leaving
some vulnerabilities firmly outside the reach of this technique.
There have been numerous attempts to solve this problem. One of the early
approaches - pioneered by Tavis Ormandy - is corpus distillation. The method
relies on coverage signals to select a subset of interesting seeds from a
massive, high-quality corpus of candidate files, and then fuzz them by
traditional means. The approach works exceptionally well, but requires such
a corpus to be readily available. In addition, block coverage measurements
provide only a very simplistic understanding of program state, and are less
useful for guiding the fuzzing effort in the long haul.
Other, more sophisticated research has focused on techniques such as program
flow analysis ("concolic execution"), symbolic execution, or static analysis.
All these methods are extremely promising in experimental settings, but tend
to suffer from reliability and performance problems in practical uses - and
currently do not offer a viable alternative to "dumb" fuzzing techniques.
## 2) The afl-fuzz approach
American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple
but rock-solid instrumentation-guided genetic algorithm. It uses a modified
form of edge coverage to effortlessly pick up subtle, local-scale changes to
program control flow.
Simplifying a bit, the overall algorithm can be summed up as:
1) Load user-supplied initial test cases into the queue,
2) Take next input file from the queue,
3) Attempt to trim the test case to the smallest size that doesn't alter
the measured behavior of the program,
4) Repeatedly mutate the file using a balanced and well-researched variety
of traditional fuzzing strategies,
5) If any of the generated mutations resulted in a new state transition
recorded by the instrumentation, add mutated output as a new entry in the
queue.
6) Go to 2.
The discovered test cases are also periodically culled to eliminate ones that
have been obsoleted by newer, higher-coverage finds; and undergo several other
instrumentation-driven effort minimization steps.
As a side result of the fuzzing process, the tool creates a small,
self-contained corpus of interesting test cases. These are extremely useful
for seeding other, labor- or resource-intensive testing regimes - for example,
for stress-testing browsers, office applications, graphics suites, or
closed-source tools.
The fuzzer is thoroughly tested to deliver out-of-the-box performance far
superior to blind fuzzing or coverage-only tools.
## 3) Instrumenting programs for use with AFL
When source code is available, instrumentation can be injected by a companion
tool that works as a drop-in replacement for gcc or clang in any standard build
process for third-party code.
The instrumentation has a fairly modest performance impact; in conjunction with
other optimizations implemented by afl-fuzz, most programs can be fuzzed as fast
or even faster than possible with traditional tools.
The correct way to recompile the target program may vary depending on the
specifics of the build process, but a nearly-universal approach would be:
```shell
$ CC=/path/to/afl/afl-gcc ./configure
$ make clean all
```
For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`.
The clang wrappers (afl-clang and afl-clang++) can be used in the same way;
clang users may also opt to leverage a higher-performance instrumentation mode,
as described in llvm_mode/README.llvm.
When testing libraries, you need to find or write a simple program that reads
data from stdin or from a file and passes it to the tested library. In such a
case, it is essential to link this executable against a static version of the
instrumented library, or to make sure that the correct .so file is loaded at
runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static
build, usually possible via:
```shell
$ CC=/path/to/afl/afl-gcc ./configure --disable-shared
```
Setting `AFL_HARDEN=1` when calling 'make' will cause the CC wrapper to
automatically enable code hardening options that make it easier to detect
simple memory bugs. Libdislocator, a helper library included with AFL (see
libdislocator/README.dislocator) can help uncover heap corruption issues, too.
PS. ASAN users are advised to review [notes_for_asan.txt](docs/notes_for_asan.txt) file for important
caveats.
## 4) Instrumenting binary-only apps
When source code is *NOT* available, the fuzzer offers experimental support for
fast, on-the-fly instrumentation of black-box binaries. This is accomplished
with a version of QEMU running in the lesser-known "user space emulation" mode.
QEMU is a project separate from AFL, but you can conveniently build the
feature by doing:
```shell
$ cd qemu_mode
$ ./build_qemu_support.sh
```
For additional instructions and caveats, see qemu_mode/README.qemu.
The mode is approximately 2-5x slower than compile-time instrumentation, is
less conducive to parallelization, and may have some other quirks.
## 5) Choosing initial test cases
To operate correctly, the fuzzer requires one or more starting file that
contains a good example of the input data normally expected by the targeted
application. There are two basic rules:
- Keep the files small. Under 1 kB is ideal, although not strictly necessary.
For a discussion of why size matters, see [perf_tips.txt](docs/perf_tips.txt).
- Use multiple test cases only if they are functionally different from
each other. There is no point in using fifty different vacation photos
to fuzz an image library.
You can find many good examples of starting files in the testcases/ subdirectory
that comes with this tool.
PS. If a large corpus of data is available for screening, you may want to use
the afl-cmin utility to identify a subset of functionally distinct files that
exercise different code paths in the target binary.
## 6) Fuzzing binaries
The fuzzing process itself is carried out by the afl-fuzz utility. This program
requires a read-only directory with initial test cases, a separate place to
store its findings, plus a path to the binary to test.
For target binaries that accept input directly from stdin, the usual syntax is:
```shell
$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program [...params...]
```
For programs that take input from a file, use '@@' to mark the location in
the target's command line where the input file name should be placed. The
fuzzer will substitute this for you:
```shell
$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@
```
You can also use the -f option to have the mutated data written to a specific
file. This is useful if the program expects a particular file extension or so.
Non-instrumented binaries can be fuzzed in the QEMU mode (add -Q in the command
line) or in a traditional, blind-fuzzer mode (specify -n).
You can use -t and -m to override the default timeout and memory limit for the
executed process; rare examples of targets that may need these settings touched
include compilers and video decoders.
Tips for optimizing fuzzing performance are discussed in [perf_tips.txt](docs/perf_tips.txt).
Note that afl-fuzz starts by performing an array of deterministic fuzzing
steps, which can take several days, but tend to produce neat test cases. If you
want quick & dirty results right away - akin to zzuf and other traditional
fuzzers - add the -d option to the command line.
## 7) Interpreting output
See the [status_screen.txt](docs/status_screen.txt) file for information on
how to interpret the displayed stats and monitor the health of the process.
Be sure to consult this file especially if any UI elements are highlighted in
red.
The fuzzing process will continue until you press Ctrl-C. At minimum, you want
to allow the fuzzer to complete one queue cycle, which may take anywhere from a
couple of hours to a week or so.
There are three subdirectories created within the output directory and updated
in real time:
- queue/ - test cases for every distinctive execution path, plus all the
starting files given by the user. This is the synthesized corpus
mentioned in section 2.
Before using this corpus for any other purposes, you can shrink
it to a smaller size using the afl-cmin tool. The tool will find
a smaller subset of files offering equivalent edge coverage.
- crashes/ - unique test cases that cause the tested program to receive a
fatal signal (e.g., SIGSEGV, SIGILL, SIGABRT). The entries are
grouped by the received signal.
- hangs/ - unique test cases that cause the tested program to time out. The
default time limit before something is classified as a hang is
the larger of 1 second and the value of the -t parameter.
The value can be fine-tuned by setting AFL_HANG_TMOUT, but this
is rarely necessary.
Crashes and hangs are considered "unique" if the associated execution paths
involve any state transitions not seen in previously-recorded faults. If a
single bug can be reached in multiple ways, there will be some count inflation
early in the process, but this should quickly taper off.
The file names for crashes and hangs are correlated with parent, non-faulting
queue entries. This should help with debugging.
When you can't reproduce a crash found by afl-fuzz, the most likely cause is
that you are not setting the same memory limit as used by the tool. Try:
```shell
$ LIMIT_MB=50
$ ( ulimit -Sv $[LIMIT_MB << 10]; /path/to/tested_binary ... )
```
Change LIMIT_MB to match the -m parameter passed to afl-fuzz. On OpenBSD,
also change -Sv to -Sd.
Any existing output directory can be also used to resume aborted jobs; try:
```shell
$ ./afl-fuzz -i- -o existing_output_dir [...etc...]
```
If you have gnuplot installed, you can also generate some pretty graphs for any
active fuzzing task using afl-plot. For an example of how this looks like,
see [http://lcamtuf.coredump.cx/afl/plot/](http://lcamtuf.coredump.cx/afl/plot/).
## 8) Parallelized fuzzing
Every instance of afl-fuzz takes up roughly one core. This means that on
multi-core systems, parallelization is necessary to fully utilize the hardware.
For tips on how to fuzz a common target on multiple cores or multiple networked
machines, please refer to [parallel_fuzzing.txt](docs/parallel_fuzzing.txt).
The parallel fuzzing mode also offers a simple way for interfacing AFL to other
fuzzers, to symbolic or concolic execution engines, and so forth; again, see the
last section of [parallel_fuzzing.txt](docs/parallel_fuzzing.txt) for tips.
## 9) Fuzzer dictionaries
By default, afl-fuzz mutation engine is optimized for compact data formats -
say, images, multimedia, compressed data, regular expression syntax, or shell
scripts. It is somewhat less suited for languages with particularly verbose and
redundant verbiage - notably including HTML, SQL, or JavaScript.
To avoid the hassle of building syntax-aware tools, afl-fuzz provides a way to
seed the fuzzing process with an optional dictionary of language keywords,
magic headers, or other special tokens associated with the targeted data type
-- and use that to reconstruct the underlying grammar on the go:
[http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html](http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html)
To use this feature, you first need to create a dictionary in one of the two
formats discussed in dictionaries/README.dictionaries; and then point the fuzzer
to it via the -x option in the command line.
(Several common dictionaries are already provided in that subdirectory, too.)
There is no way to provide more structured descriptions of the underlying
syntax, but the fuzzer will likely figure out some of this based on the
instrumentation feedback alone. This actually works in practice, say:
[http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html](http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html)
PS. Even when no explicit dictionary is given, afl-fuzz will try to extract
existing syntax tokens in the input corpus by watching the instrumentation
very closely during deterministic byte flips. This works for some types of
parsers and grammars, but isn't nearly as good as the -x mode.
If a dictionary is really hard to come by, another option is to let AFL run
for a while, and then use the token capture library that comes as a companion
utility with AFL. For that, see libtokencap/README.tokencap.
## 10) Crash triage
The coverage-based grouping of crashes usually produces a small data set that
can be quickly triaged manually or with a very simple GDB or Valgrind script.
Every crash is also traceable to its parent non-crashing test case in the
queue, making it easier to diagnose faults.
Having said that, it's important to acknowledge that some fuzzing crashes can be
difficult to quickly evaluate for exploitability without a lot of debugging and
code analysis work. To assist with this task, afl-fuzz supports a very unique
"crash exploration" mode enabled with the -C flag.
In this mode, the fuzzer takes one or more crashing test cases as the input,
and uses its feedback-driven fuzzing strategies to very quickly enumerate all
code paths that can be reached in the program while keeping it in the
crashing state.
Mutations that do not result in a crash are rejected; so are any changes that
do not affect the execution path.
The output is a small corpus of files that can be very rapidly examined to see
what degree of control the attacker has over the faulting address, or whether
it is possible to get past an initial out-of-bounds read - and see what lies
beneath.
Oh, one more thing: for test case minimization, give afl-tmin a try. The tool
can be operated in a very simple way:
```shell
$ ./afl-tmin -i test_case -o minimized_result -- /path/to/program [...]
```
The tool works with crashing and non-crashing test cases alike. In the crash
mode, it will happily accept instrumented and non-instrumented binaries. In the
non-crashing mode, the minimizer relies on standard AFL instrumentation to make
the file simpler without altering the execution path.
The minimizer accepts the -m, -t, -f and @@ syntax in a manner compatible with
afl-fuzz.
Another recent addition to AFL is the afl-analyze tool. It takes an input
file, attempts to sequentially flip bytes, and observes the behavior of the
tested program. It then color-codes the input based on which sections appear to
be critical, and which are not; while not bulletproof, it can often offer quick
insights into complex file formats. More info about its operation can be found
near the end of [technical_details.txt](docs/technical_details.txt).
## 11) Going beyond crashes
Fuzzing is a wonderful and underutilized technique for discovering non-crashing
design and implementation errors, too. Quite a few interesting bugs have been
found by modifying the target programs to call abort() when, say:
- Two bignum libraries produce different outputs when given the same
fuzzer-generated input,
- An image library produces different outputs when asked to decode the same
input image several times in a row,
- A serialization / deserialization library fails to produce stable outputs
when iteratively serializing and deserializing fuzzer-supplied data,
- A compression library produces an output inconsistent with the input file
when asked to compress and then decompress a particular blob.
Implementing these or similar sanity checks usually takes very little time;
if you are the maintainer of a particular package, you can make this code
conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also
shared with libfuzzer) or `#ifdef __AFL_COMPILER` (this one is just for AFL).
## 12) Common-sense risks
Please keep in mind that, similarly to many other computationally-intensive
tasks, fuzzing may put strain on your hardware and on the OS. In particular:
- Your CPU will run hot and will need adequate cooling. In most cases, if
cooling is insufficient or stops working properly, CPU speeds will be
automatically throttled. That said, especially when fuzzing on less
suitable hardware (laptops, smartphones, etc), it's not entirely impossible
for something to blow up.
- Targeted programs may end up erratically grabbing gigabytes of memory or
filling up disk space with junk files. AFL tries to enforce basic memory
limits, but can't prevent each and every possible mishap. The bottom line
is that you shouldn't be fuzzing on systems where the prospect of data loss
is not an acceptable risk.
- Fuzzing involves billions of reads and writes to the filesystem. On modern
systems, this will be usually heavily cached, resulting in fairly modest
"physical" I/O - but there are many factors that may alter this equation.
It is your responsibility to monitor for potential trouble; with very heavy
I/O, the lifespan of many HDDs and SSDs may be reduced.
A good way to monitor disk I/O on Linux is the 'iostat' command:
```shell
$ iostat -d 3 -x -k [...optional disk ID...]
```
## 13) Known limitations & areas for improvement
Here are some of the most important caveats for AFL:
- AFL detects faults by checking for the first spawned process dying due to
a signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for
these signals may need to have the relevant code commented out. In the same
vein, faults in child processed spawned by the fuzzed target may evade
detection unless you manually add some code to catch that.
- As with any other brute-force tool, the fuzzer offers limited coverage if
encryption, checksums, cryptographic signatures, or compression are used to
wholly wrap the actual data format to be tested.
To work around this, you can comment out the relevant checks (see
experimental/libpng_no_checksum/ for inspiration); if this is not possible,
you can also write a postprocessor, as explained in
experimental/post_library/.
- There are some unfortunate trade-offs with ASAN and 64-bit binaries. This
isn't due to any specific fault of afl-fuzz; see [notes_for_asan.txt](docs/notes_for_asan.txt)
for tips.
- There is no direct support for fuzzing network services, background
daemons, or interactive apps that require UI interaction to work. You may
need to make simple code changes to make them behave in a more traditional
way. Preeny may offer a relatively simple option, too - see:
https://github.com/zardus/preeny
Some useful tips for modifying network-based services can be also found at:
https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
- AFL doesn't output human-readable coverage data. If you want to monitor
coverage, use afl-cov from Michael Rash: https://github.com/mrash/afl-cov
- Occasionally, sentient machines rise against their creators. If this
happens to you, please consult http://lcamtuf.coredump.cx/prep/.
Beyond this, see INSTALL for platform-specific tips.
## 14) Special thanks
Many of the improvements to afl-fuzz wouldn't be possible without feedback,
bug reports, or patches from:
```
Jann Horn Hanno Boeck
Felix Groebert Jakub Wilk
Richard W. M. Jones Alexander Cherepanov
Tom Ritter Hovik Manucharyan
Sebastian Roschke Eberhard Mattes
Padraig Brady Ben Laurie
@dronesec Luca Barbato
Tobias Ospelt Thomas Jarosch
Martin Carpenter Mudge Zatko
Joe Zbiciak Ryan Govostes
Michael Rash William Robinet
Jonathan Gray Filipe Cabecinhas
Nico Weber Jodie Cunningham
Andrew Griffiths Parker Thompson
Jonathan Neuschfer Tyler Nighswander
Ben Nagy Samir Aguiar
Aidan Thornton Aleksandar Nikolich
Sam Hakim Laszlo Szekeres
David A. Wheeler Turo Lamminen
Andreas Stieger Richard Godbee
Louis Dassy teor2345
Alex Moneger Dmitry Vyukov
Keegan McAllister Kostya Serebryany
Richo Healey Martijn Bogaard
rc0r Jonathan Foote
Christian Holler Dominique Pelle
Jacek Wielemborek Leo Barnes
Jeremy Barnes Jeff Trull
Guillaume Endignoux ilovezfs
Daniel Godas-Lopez Franjo Ivancic
Austin Seipp Daniel Komaromy
Daniel Binderman Jonathan Metzman
Vegard Nossum Jan Kneschke
Kurt Roeckx Marcel Bohme
Van-Thuan Pham Abhik Roychoudhury
Joshua J. Drake Toby Hutton
Rene Freingruber Sergey Davidoff
Sami Liedes Craig Young
Andrzej Jackowski Daniel Hodson
```
Thank you!
## 15) Contact
Questions? Concerns? Bug reports? Please use GitHub.
There is also a mailing list for the project; to join, send a mail to
<afl-users+subscribe@googlegroups.com>. Or, if you prefer to browse
archives first, try: [https://groups.google.com/group/afl-users](https://groups.google.com/group/afl-users).

File diff suppressed because it is too large Load Diff

@ -0,0 +1,557 @@
/*
Copyright 2013 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - wrapper for GNU as
---------------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
The sole purpose of this wrapper is to preprocess assembly files generated
by GCC / clang and inject the instrumentation bits included from afl-as.h. It
is automatically invoked by the toolchain when compiling programs using
afl-gcc / afl-clang.
Note that it's an explicit non-goal to instrument hand-written assembly,
be it in separate .s files or in __asm__ blocks. The only aspiration this
utility has right now is to be able to skip them gracefully and allow the
compilation process to continue.
That said, see experimental/clang_asm_normalize/ for a solution that may
allow clang users to make things work even with hand-crafted assembly. Just
note that there is no equivalent for GCC.
*/
#define AFL_MAIN
#include "config.h"
#include "types.h"
#include "debug.h"
#include "alloc-inl.h"
#include "afl-as.h"
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <ctype.h>
#include <fcntl.h>
#include <sys/wait.h>
#include <sys/time.h>
static u8** as_params; /* Parameters passed to the real 'as' */
static u8* input_file; /* Originally specified input file */
static u8* modified_file; /* Instrumented file for the real 'as' */
static u8 be_quiet, /* Quiet mode (no stderr output) */
clang_mode, /* Running in clang mode? */
pass_thru, /* Just pass data through? */
just_version, /* Just show version? */
sanitizer; /* Using ASAN / MSAN */
static u32 inst_ratio = 100, /* Instrumentation probability (%) */
as_par_cnt = 1; /* Number of params to 'as' */
/* If we don't find --32 or --64 in the command line, default to
instrumentation for whichever mode we were compiled with. This is not
perfect, but should do the trick for almost all use cases. */
#ifdef WORD_SIZE_64
static u8 use_64bit = 1;
#else
static u8 use_64bit = 0;
#ifdef __APPLE__
# error "Sorry, 32-bit Apple platforms are not supported."
#endif /* __APPLE__ */
#endif /* ^WORD_SIZE_64 */
/* Examine and modify parameters to pass to 'as'. Note that the file name
is always the last parameter passed by GCC, so we exploit this property
to keep the code simple. */
static void edit_params(int argc, char** argv) {
u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
u32 i;
#ifdef __APPLE__
u8 use_clang_as = 0;
/* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
with the code generated by newer versions of clang that are hand-built
by the user. See the thread here: http://goo.gl/HBWDtn.
To work around this, when using clang and running without AFL_AS
specified, we will actually call 'clang -c' instead of 'as -q' to
compile the assembly file.
The tools aren't cmdline-compatible, but at least for now, we can
seemingly get away with this by making only very minor tweaks. Thanks
to Nico Weber for the idea. */
if (clang_mode && !afl_as) {
use_clang_as = 1;
afl_as = getenv("AFL_CC");
if (!afl_as) afl_as = getenv("AFL_CXX");
if (!afl_as) afl_as = "clang";
}
#endif /* __APPLE__ */
/* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
is not set. We need to check these non-standard variables to properly
handle the pass_thru logic later on. */
if (!tmp_dir) tmp_dir = getenv("TEMP");
if (!tmp_dir) tmp_dir = getenv("TMP");
if (!tmp_dir) tmp_dir = "/tmp";
as_params = ck_alloc((argc + 32) * sizeof(u8*));
as_params[0] = afl_as ? afl_as : (u8*)"as";
as_params[argc] = 0;
for (i = 1; i < argc - 1; i++) {
if (!strcmp(argv[i], "--64")) use_64bit = 1;
else if (!strcmp(argv[i], "--32")) use_64bit = 0;
#ifdef __APPLE__
/* The Apple case is a bit different... */
if (!strcmp(argv[i], "-arch") && i + 1 < argc) {
if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1;
else if (!strcmp(argv[i + 1], "i386"))
FATAL("Sorry, 32-bit Apple platforms are not supported.");
}
/* Strip options that set the preference for a particular upstream
assembler in Xcode. */
if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
continue;
#endif /* __APPLE__ */
as_params[as_par_cnt++] = argv[i];
}
#ifdef __APPLE__
/* When calling clang as the upstream assembler, append -c -x assembler
and hope for the best. */
if (use_clang_as) {
as_params[as_par_cnt++] = "-c";
as_params[as_par_cnt++] = "-x";
as_params[as_par_cnt++] = "assembler";
}
#endif /* __APPLE__ */
input_file = argv[argc - 1];
if (input_file[0] == '-') {
if (!strcmp(input_file + 1, "-version")) {
just_version = 1;
modified_file = input_file;
goto wrap_things_up;
}
if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)");
else input_file = NULL;
} else {
/* Check if this looks like a standard invocation as a part of an attempt
to compile a program, rather than using gcc on an ad-hoc .s file in
a format we may not understand. This works around an issue compiling
NSS. */
if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
strncmp(input_file, "/var/tmp/", 9) &&
strncmp(input_file, "/tmp/", 5)) pass_thru = 1;
}
modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
(u32)time(NULL));
wrap_things_up:
as_params[as_par_cnt++] = modified_file;
as_params[as_par_cnt] = NULL;
}
/* Process input file, generate modified_file. Insert instrumentation in all
the appropriate places. */
static void add_instrumentation(void) {
static u8 line[MAX_LINE];
FILE* inf;
FILE* outf;
s32 outfd;
u32 ins_lines = 0;
u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0,
skip_intel = 0, skip_app = 0, instrument_next = 0;
#ifdef __APPLE__
u8* colon_pos;
#endif /* __APPLE__ */
if (input_file) {
inf = fopen(input_file, "r");
if (!inf) PFATAL("Unable to read '%s'", input_file);
} else inf = stdin;
outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);
if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);
outf = fdopen(outfd, "w");
if (!outf) PFATAL("fdopen() failed");
while (fgets(line, MAX_LINE, inf)) {
/* In some cases, we want to defer writing the instrumentation trampoline
until after all the labels, macros, comments, etc. If we're in this
mode, and if the line starts with a tab followed by a character, dump
the trampoline now. */
if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
instrument_next && line[0] == '\t' && isalpha(line[1])) {
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE));
instrument_next = 0;
ins_lines++;
}
/* Output the actual line, call it a day in pass-thru mode. */
fputs(line, outf);
if (pass_thru) continue;
/* All right, this is where the actual fun begins. For one, we only want to
instrument the .text section. So, let's keep track of that in processed
files - and let's set instr_ok accordingly. */
if (line[0] == '\t' && line[1] == '.') {
/* OpenBSD puts jump tables directly inline with the code, which is
a bit annoying. They use a specific format of p2align directives
around them, so we use that as a signal. */
if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;
if (!strncmp(line + 2, "text\n", 5) ||
!strncmp(line + 2, "section\t.text", 13) ||
!strncmp(line + 2, "section\t__TEXT,__text", 21) ||
!strncmp(line + 2, "section __TEXT,__text", 21)) {
instr_ok = 1;
continue;
}
if (!strncmp(line + 2, "section\t", 8) ||
!strncmp(line + 2, "section ", 8) ||
!strncmp(line + 2, "bss\n", 4) ||
!strncmp(line + 2, "data\n", 5)) {
instr_ok = 0;
continue;
}
}
/* Detect off-flavor assembly (rare, happens in gdb). When this is
encountered, we set skip_csect until the opposite directive is
seen, and we do not instrument. */
if (strstr(line, ".code")) {
if (strstr(line, ".code32")) skip_csect = use_64bit;
if (strstr(line, ".code64")) skip_csect = !use_64bit;
}
/* Detect syntax changes, as could happen with hand-written assembly.
Skip Intel blocks, resume instrumentation when back to AT&T. */
if (strstr(line, ".intel_syntax")) skip_intel = 1;
if (strstr(line, ".att_syntax")) skip_intel = 0;
/* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
if (line[0] == '#' || line[1] == '#') {
if (strstr(line, "#APP")) skip_app = 1;
if (strstr(line, "#NO_APP")) skip_app = 0;
}
/* If we're in the right mood for instrumenting, check for function
names or conditional labels. This is a bit messy, but in essence,
we want to catch:
^main: - function entry point (always instrumented)
^.L0: - GCC branch label
^.LBB0_0: - clang branch label (but only in clang mode)
^\tjnz foo - conditional branches
...but not:
^# BB#0: - clang comments
^ # BB#0: - ditto
^.Ltmp0: - clang non-branch labels
^.LC0 - GCC non-branch labels
^.LBB0_0: - ditto (when in GCC mode)
^\tjmp foo - non-conditional jumps
Additionally, clang and GCC on MacOS X follow a different convention
with no leading dots on labels, hence the weird maze of #ifdefs
later on.
*/
if (skip_intel || skip_app || skip_csect || !instr_ok ||
line[0] == '#' || line[0] == ' ') continue;
/* Conditional branch instruction (jnz, etc). We append the instrumentation
right after the branch (to instrument the not-taken path) and at the
branch destination label (handled later on). */
if (line[0] == '\t') {
if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE));
ins_lines++;
}
continue;
}
/* Label of some sort. This may be a branch destination, but we need to
tread carefully and account for several different formatting
conventions. */
#ifdef __APPLE__
/* Apple: L<whatever><digit>: */
if ((colon_pos = strstr(line, ":"))) {
if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
#else
/* Everybody else: .L<whatever>: */
if (strstr(line, ":")) {
if (line[0] == '.') {
#endif /* __APPLE__ */
/* .L0: or LBB0_0: style jump destination */
#ifdef __APPLE__
/* Apple: L<num> / LBB<num> */
if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
&& R(100) < inst_ratio) {
#else
/* Apple: .L<num> / .LBB<num> */
if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
&& R(100) < inst_ratio) {
#endif /* __APPLE__ */
/* An optimization is possible here by adding the code only if the
label is mentioned in the code in contexts other than call / jmp.
That said, this complicates the code by requiring two-pass
processing (messy with stdin), and results in a speed gain
typically under 10%, because compilers are generally pretty good
about not generating spurious intra-function jumps.
We use deferred output chiefly to avoid disrupting
.Lfunc_begin0-style exception handling calculations (a problem on
MacOS X). */
if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;
}
} else {
/* Function label (always instrumented, deferred mode). */
instrument_next = 1;
}
}
}
if (ins_lines)
fputs(use_64bit ? main_payload_64 : main_payload_32, outf);
if (input_file) fclose(inf);
fclose(outf);
if (!be_quiet) {
if (!ins_lines) WARNF("No instrumentation targets found%s.",
pass_thru ? " (pass-thru mode)" : "");
else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
ins_lines, use_64bit ? "64" : "32",
getenv("AFL_HARDEN") ? "hardened" :
(sanitizer ? "ASAN/MSAN" : "non-hardened"),
inst_ratio);
}
}
/* Main entry point */
int main(int argc, char** argv) {
s32 pid;
u32 rand_seed;
int status;
u8* inst_ratio_str = getenv("AFL_INST_RATIO");
struct timeval tv;
struct timezone tz;
clang_mode = !!getenv(CLANG_ENV_VAR);
if (isatty(2) && !getenv("AFL_QUIET")) {
SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
} else be_quiet = 1;
if (argc < 2) {
SAYF("\n"
"This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n"
"executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n"
"don't want to run this program directly.\n\n"
"Rarely, when dealing with extremely complex projects, it may be advisable to\n"
"set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n"
"instrumenting every discovered branch.\n\n");
exit(1);
}
gettimeofday(&tv, &tz);
rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
srandom(rand_seed);
edit_params(argc, argv);
if (inst_ratio_str) {
if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100)
FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
}
if (getenv(AS_LOOP_ENV_VAR))
FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");
setenv(AS_LOOP_ENV_VAR, "1", 1);
/* When compiling with ASAN, we don't have a particularly elegant way to skip
ASAN-specific branches. But we can probabilistically compensate for
that... */
if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
sanitizer = 1;
inst_ratio /= 3;
}
if (!just_version) add_instrumentation();
if (!(pid = fork())) {
execvp(as_params[0], (char**)as_params);
FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
}
if (pid < 0) PFATAL("fork() failed");
if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);
exit(WEXITSTATUS(status));
}

@ -0,0 +1,727 @@
/*
Copyright 2013 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - injectable parts
-------------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
Forkserver design by Jann Horn <jannhorn@googlemail.com>
This file houses the assembly-level instrumentation injected into fuzzed
programs. The instrumentation stores XORed pairs of data: identifiers of the
currently executing branch and the one that executed immediately before.
TL;DR: the instrumentation does shm_trace_map[cur_loc ^ prev_loc]++
The code is designed for 32-bit and 64-bit x86 systems. Both modes should
work everywhere except for Apple systems. Apple does relocations differently
from everybody else, so since their OSes have been 64-bit for a longer while,
I didn't go through the mental effort of porting the 32-bit code.
In principle, similar code should be easy to inject into any well-behaved
binary-only code (e.g., using DynamoRIO). Conditional jumps offer natural
targets for instrumentation, and should offer comparable probe density.
*/
#ifndef _HAVE_AFL_AS_H
#define _HAVE_AFL_AS_H
#include "config.h"
#include "types.h"
/*
------------------
Performances notes
------------------
Contributions to make this code faster are appreciated! Here are some
rough notes that may help with the task:
- Only the trampoline_fmt and the non-setup __afl_maybe_log code paths are
really worth optimizing; the setup / fork server stuff matters a lot less
and should be mostly just kept readable.
- We're aiming for modern CPUs with out-of-order execution and large
pipelines; the code is mostly follows intuitive, human-readable
instruction ordering, because "textbook" manual reorderings make no
substantial difference.
- Interestingly, instrumented execution isn't a lot faster if we store a
variable pointer to the setup, log, or return routine and then do a reg
call from within trampoline_fmt. It does speed up non-instrumented
execution quite a bit, though, since that path just becomes
push-call-ret-pop.
- There is also not a whole lot to be gained by doing SHM attach at a
fixed address instead of retrieving __afl_area_ptr. Although it allows us
to have a shorter log routine inserted for conditional jumps and jump
labels (for a ~10% perf gain), there is a risk of bumping into other
allocations created by the program or by tools such as ASAN.
- popf is *awfully* slow, which is why we're doing the lahf / sahf +
overflow test trick. Unfortunately, this forces us to taint eax / rax, but
this dependency on a commonly-used register still beats the alternative of
using pushf / popf.
One possible optimization is to avoid touching flags by using a circular
buffer that stores just a sequence of current locations, with the XOR stuff
happening offline. Alas, this doesn't seem to have a huge impact:
https://groups.google.com/d/msg/afl-users/MsajVf4fRLo/2u6t88ntUBIJ
- Preforking one child a bit sooner, and then waiting for the "go" command
from within the child, doesn't offer major performance gains; fork() seems
to be relatively inexpensive these days. Preforking multiple children does
help, but badly breaks the "~1 core per fuzzer" design, making it harder to
scale up. Maybe there is some middle ground.
Perhaps of note: in the 64-bit version for all platforms except for Apple,
the instrumentation is done slightly differently than on 32-bit, with
__afl_prev_loc and __afl_area_ptr being local to the object file (.lcomm),
rather than global (.comm). This is to avoid GOTRELPC lookups in the critical
code path, which AFAICT, are otherwise unavoidable if we want gcc -shared to
work; simple relocations between .bss and .text won't work on most 64-bit
platforms in such a case.
(Fun fact: on Apple systems, .lcomm can segfault the linker.)
The side effect is that state transitions are measured in a somewhat
different way, with previous tuple being recorded separately within the scope
of every .c file. This should have no impact in any practical sense.
Another side effect of this design is that getenv() will be called once per
every .o file when running in non-instrumented mode; and since getenv() tends
to be optimized in funny ways, we need to be very careful to save every
oddball register it may touch.
*/
static const u8* trampoline_fmt_32 =
"\n"
"/* --- AFL TRAMPOLINE (32-BIT) --- */\n"
"\n"
".align 4\n"
"\n"
"leal -16(%%esp), %%esp\n"
"movl %%edi, 0(%%esp)\n"
"movl %%edx, 4(%%esp)\n"
"movl %%ecx, 8(%%esp)\n"
"movl %%eax, 12(%%esp)\n"
"movl $0x%08x, %%ecx\n"
"call __afl_maybe_log\n"
"movl 12(%%esp), %%eax\n"
"movl 8(%%esp), %%ecx\n"
"movl 4(%%esp), %%edx\n"
"movl 0(%%esp), %%edi\n"
"leal 16(%%esp), %%esp\n"
"\n"
"/* --- END --- */\n"
"\n";
static const u8* trampoline_fmt_64 =
"\n"
"/* --- AFL TRAMPOLINE (64-BIT) --- */\n"
"\n"
".align 4\n"
"\n"
"leaq -(128+24)(%%rsp), %%rsp\n"
"movq %%rdx, 0(%%rsp)\n"
"movq %%rcx, 8(%%rsp)\n"
"movq %%rax, 16(%%rsp)\n"
"movq $0x%08x, %%rcx\n"
"call __afl_maybe_log\n"
"movq 16(%%rsp), %%rax\n"
"movq 8(%%rsp), %%rcx\n"
"movq 0(%%rsp), %%rdx\n"
"leaq (128+24)(%%rsp), %%rsp\n"
"\n"
"/* --- END --- */\n"
"\n";
static const u8* main_payload_32 =
"\n"
"/* --- AFL MAIN PAYLOAD (32-BIT) --- */\n"
"\n"
".text\n"
".att_syntax\n"
".code32\n"
".align 8\n"
"\n"
"__afl_maybe_log:\n"
"\n"
" lahf\n"
" seto %al\n"
"\n"
" /* Check if SHM region is already mapped. */\n"
"\n"
" movl __afl_area_ptr, %edx\n"
" testl %edx, %edx\n"
" je __afl_setup\n"
"\n"
"__afl_store:\n"
"\n"
" /* Calculate and store hit for the code location specified in ecx. There\n"
" is a double-XOR way of doing this without tainting another register,\n"
" and we use it on 64-bit systems; but it's slower for 32-bit ones. */\n"
"\n"
#ifndef COVERAGE_ONLY
" movl __afl_prev_loc, %edi\n"
" xorl %ecx, %edi\n"
" shrl $1, %ecx\n"
" movl %ecx, __afl_prev_loc\n"
#else
" movl %ecx, %edi\n"
#endif /* ^!COVERAGE_ONLY */
"\n"
#ifdef SKIP_COUNTS
" orb $1, (%edx, %edi, 1)\n"
#else
" incb (%edx, %edi, 1)\n"
#endif /* ^SKIP_COUNTS */
"\n"
"__afl_return:\n"
"\n"
" addb $127, %al\n"
" sahf\n"
" ret\n"
"\n"
".align 8\n"
"\n"
"__afl_setup:\n"
"\n"
" /* Do not retry setup if we had previous failures. */\n"
"\n"
" cmpb $0, __afl_setup_failure\n"
" jne __afl_return\n"
"\n"
" /* Map SHM, jumping to __afl_setup_abort if something goes wrong.\n"
" We do not save FPU/MMX/SSE registers here, but hopefully, nobody\n"
" will notice this early in the game. */\n"
"\n"
" pushl %eax\n"
" pushl %ecx\n"
"\n"
" pushl $.AFL_SHM_ENV\n"
" call getenv\n"
" addl $4, %esp\n"
"\n"
" testl %eax, %eax\n"
" je __afl_setup_abort\n"
"\n"
" pushl %eax\n"
" call atoi\n"
" addl $4, %esp\n"
"\n"
" pushl $0 /* shmat flags */\n"
" pushl $0 /* requested addr */\n"
" pushl %eax /* SHM ID */\n"
" call shmat\n"
" addl $12, %esp\n"
"\n"
" cmpl $-1, %eax\n"
" je __afl_setup_abort\n"
"\n"
" /* Store the address of the SHM region. */\n"
"\n"
" movl %eax, __afl_area_ptr\n"
" movl %eax, %edx\n"
"\n"
" popl %ecx\n"
" popl %eax\n"
"\n"
"__afl_forkserver:\n"
"\n"
" /* Enter the fork server mode to avoid the overhead of execve() calls. */\n"
"\n"
" pushl %eax\n"
" pushl %ecx\n"
" pushl %edx\n"
"\n"
" /* Phone home and tell the parent that we're OK. (Note that signals with\n"
" no SA_RESTART will mess it up). If this fails, assume that the fd is\n"
" closed because we were execve()d from an instrumented binary, or because\n"
" the parent doesn't want to use the fork server. */\n"
"\n"
" pushl $4 /* length */\n"
" pushl $__afl_temp /* data */\n"
" pushl $" STRINGIFY((FORKSRV_FD + 1)) " /* file desc */\n"
" call write\n"
" addl $12, %esp\n"
"\n"
" cmpl $4, %eax\n"
" jne __afl_fork_resume\n"
"\n"
"__afl_fork_wait_loop:\n"
"\n"
" /* Wait for parent by reading from the pipe. Abort if read fails. */\n"
"\n"
" pushl $4 /* length */\n"
" pushl $__afl_temp /* data */\n"
" pushl $" STRINGIFY(FORKSRV_FD) " /* file desc */\n"
" call read\n"
" addl $12, %esp\n"
"\n"
" cmpl $4, %eax\n"
" jne __afl_die\n"
"\n"
" /* Once woken up, create a clone of our process. This is an excellent use\n"
" case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n"
" caches getpid() results and offers no way to update the value, breaking\n"
" abort(), raise(), and a bunch of other things :-( */\n"
"\n"
" call fork\n"
"\n"
" cmpl $0, %eax\n"
" jl __afl_die\n"
" je __afl_fork_resume\n"
"\n"
" /* In parent process: write PID to pipe, then wait for child. */\n"
"\n"
" movl %eax, __afl_fork_pid\n"
"\n"
" pushl $4 /* length */\n"
" pushl $__afl_fork_pid /* data */\n"
" pushl $" STRINGIFY((FORKSRV_FD + 1)) " /* file desc */\n"
" call write\n"
" addl $12, %esp\n"
"\n"
" pushl $0 /* no flags */\n"
" pushl $__afl_temp /* status */\n"
" pushl __afl_fork_pid /* PID */\n"
" call waitpid\n"
" addl $12, %esp\n"
"\n"
" cmpl $0, %eax\n"
" jle __afl_die\n"
"\n"
" /* Relay wait status to pipe, then loop back. */\n"
"\n"
" pushl $4 /* length */\n"
" pushl $__afl_temp /* data */\n"
" pushl $" STRINGIFY((FORKSRV_FD + 1)) " /* file desc */\n"
" call write\n"
" addl $12, %esp\n"
"\n"
" jmp __afl_fork_wait_loop\n"
"\n"
"__afl_fork_resume:\n"
"\n"
" /* In child process: close fds, resume execution. */\n"
"\n"
" pushl $" STRINGIFY(FORKSRV_FD) "\n"
" call close\n"
"\n"
" pushl $" STRINGIFY((FORKSRV_FD + 1)) "\n"
" call close\n"
"\n"
" addl $8, %esp\n"
"\n"
" popl %edx\n"
" popl %ecx\n"
" popl %eax\n"
" jmp __afl_store\n"
"\n"
"__afl_die:\n"
"\n"
" xorl %eax, %eax\n"
" call _exit\n"
"\n"
"__afl_setup_abort:\n"
"\n"
" /* Record setup failure so that we don't keep calling\n"
" shmget() / shmat() over and over again. */\n"
"\n"
" incb __afl_setup_failure\n"
" popl %ecx\n"
" popl %eax\n"
" jmp __afl_return\n"
"\n"
".AFL_VARS:\n"
"\n"
" .comm __afl_area_ptr, 4, 32\n"
" .comm __afl_setup_failure, 1, 32\n"
#ifndef COVERAGE_ONLY
" .comm __afl_prev_loc, 4, 32\n"
#endif /* !COVERAGE_ONLY */
" .comm __afl_fork_pid, 4, 32\n"
" .comm __afl_temp, 4, 32\n"
"\n"
".AFL_SHM_ENV:\n"
" .asciz \"" SHM_ENV_VAR "\"\n"
"\n"
"/* --- END --- */\n"
"\n";
/* The OpenBSD hack is due to lahf and sahf not being recognized by some
versions of binutils: http://marc.info/?l=openbsd-cvs&m=141636589924400
The Apple code is a bit different when calling libc functions because
they are doing relocations differently from everybody else. We also need
to work around the crash issue with .lcomm and the fact that they don't
recognize .string. */
#ifdef __APPLE__
# define CALL_L64(str) "call _" str "\n"
#else
# define CALL_L64(str) "call " str "@PLT\n"
#endif /* ^__APPLE__ */
static const u8* main_payload_64 =
"\n"
"/* --- AFL MAIN PAYLOAD (64-BIT) --- */\n"
"\n"
".text\n"
".att_syntax\n"
".code64\n"
".align 8\n"
"\n"
"__afl_maybe_log:\n"
"\n"
#if defined(__OpenBSD__) || (defined(__FreeBSD__) && (__FreeBSD__ < 9))
" .byte 0x9f /* lahf */\n"
#else
" lahf\n"
#endif /* ^__OpenBSD__, etc */
" seto %al\n"
"\n"
" /* Check if SHM region is already mapped. */\n"
"\n"
" movq __afl_area_ptr(%rip), %rdx\n"
" testq %rdx, %rdx\n"
" je __afl_setup\n"
"\n"
"__afl_store:\n"
"\n"
" /* Calculate and store hit for the code location specified in rcx. */\n"
"\n"
#ifndef COVERAGE_ONLY
" xorq __afl_prev_loc(%rip), %rcx\n"
" xorq %rcx, __afl_prev_loc(%rip)\n"
" shrq $1, __afl_prev_loc(%rip)\n"
#endif /* ^!COVERAGE_ONLY */
"\n"
#ifdef SKIP_COUNTS
" orb $1, (%rdx, %rcx, 1)\n"
#else
" incb (%rdx, %rcx, 1)\n"
#endif /* ^SKIP_COUNTS */
"\n"
"__afl_return:\n"
"\n"
" addb $127, %al\n"
#if defined(__OpenBSD__) || (defined(__FreeBSD__) && (__FreeBSD__ < 9))
" .byte 0x9e /* sahf */\n"
#else
" sahf\n"
#endif /* ^__OpenBSD__, etc */
" ret\n"
"\n"
".align 8\n"
"\n"
"__afl_setup:\n"
"\n"
" /* Do not retry setup if we had previous failures. */\n"
"\n"
" cmpb $0, __afl_setup_failure(%rip)\n"
" jne __afl_return\n"
"\n"
" /* Check out if we have a global pointer on file. */\n"
"\n"
#ifndef __APPLE__
" movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n"
" movq (%rdx), %rdx\n"
#else
" movq __afl_global_area_ptr(%rip), %rdx\n"
#endif /* !^__APPLE__ */
" testq %rdx, %rdx\n"
" je __afl_setup_first\n"
"\n"
" movq %rdx, __afl_area_ptr(%rip)\n"
" jmp __afl_store\n"
"\n"
"__afl_setup_first:\n"
"\n"
" /* Save everything that is not yet saved and that may be touched by\n"
" getenv() and several other libcalls we'll be relying on. */\n"
"\n"
" leaq -352(%rsp), %rsp\n"
"\n"
" movq %rax, 0(%rsp)\n"
" movq %rcx, 8(%rsp)\n"
" movq %rdi, 16(%rsp)\n"
" movq %rsi, 32(%rsp)\n"
" movq %r8, 40(%rsp)\n"
" movq %r9, 48(%rsp)\n"
" movq %r10, 56(%rsp)\n"
" movq %r11, 64(%rsp)\n"
"\n"
" movq %xmm0, 96(%rsp)\n"
" movq %xmm1, 112(%rsp)\n"
" movq %xmm2, 128(%rsp)\n"
" movq %xmm3, 144(%rsp)\n"
" movq %xmm4, 160(%rsp)\n"
" movq %xmm5, 176(%rsp)\n"
" movq %xmm6, 192(%rsp)\n"
" movq %xmm7, 208(%rsp)\n"
" movq %xmm8, 224(%rsp)\n"
" movq %xmm9, 240(%rsp)\n"
" movq %xmm10, 256(%rsp)\n"
" movq %xmm11, 272(%rsp)\n"
" movq %xmm12, 288(%rsp)\n"
" movq %xmm13, 304(%rsp)\n"
" movq %xmm14, 320(%rsp)\n"
" movq %xmm15, 336(%rsp)\n"
"\n"
" /* Map SHM, jumping to __afl_setup_abort if something goes wrong. */\n"
"\n"
" /* The 64-bit ABI requires 16-byte stack alignment. We'll keep the\n"
" original stack ptr in the callee-saved r12. */\n"
"\n"
" pushq %r12\n"
" movq %rsp, %r12\n"
" subq $16, %rsp\n"
" andq $0xfffffffffffffff0, %rsp\n"
"\n"
" leaq .AFL_SHM_ENV(%rip), %rdi\n"
CALL_L64("getenv")
"\n"
" testq %rax, %rax\n"
" je __afl_setup_abort\n"
"\n"
" movq %rax, %rdi\n"
CALL_L64("atoi")
"\n"
" xorq %rdx, %rdx /* shmat flags */\n"
" xorq %rsi, %rsi /* requested addr */\n"
" movq %rax, %rdi /* SHM ID */\n"
CALL_L64("shmat")
"\n"
" cmpq $-1, %rax\n"
" je __afl_setup_abort\n"
"\n"
" /* Store the address of the SHM region. */\n"
"\n"
" movq %rax, %rdx\n"
" movq %rax, __afl_area_ptr(%rip)\n"
"\n"
#ifdef __APPLE__
" movq %rax, __afl_global_area_ptr(%rip)\n"
#else
" movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n"
" movq %rax, (%rdx)\n"
#endif /* ^__APPLE__ */
" movq %rax, %rdx\n"
"\n"
"__afl_forkserver:\n"
"\n"
" /* Enter the fork server mode to avoid the overhead of execve() calls. We\n"
" push rdx (area ptr) twice to keep stack alignment neat. */\n"
"\n"
" pushq %rdx\n"
" pushq %rdx\n"
"\n"
" /* Phone home and tell the parent that we're OK. (Note that signals with\n"
" no SA_RESTART will mess it up). If this fails, assume that the fd is\n"
" closed because we were execve()d from an instrumented binary, or because\n"
" the parent doesn't want to use the fork server. */\n"
"\n"
" movq $4, %rdx /* length */\n"
" leaq __afl_temp(%rip), %rsi /* data */\n"
" movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi /* file desc */\n"
CALL_L64("write")
"\n"
" cmpq $4, %rax\n"
" jne __afl_fork_resume\n"
"\n"
"__afl_fork_wait_loop:\n"
"\n"
" /* Wait for parent by reading from the pipe. Abort if read fails. */\n"
"\n"
" movq $4, %rdx /* length */\n"
" leaq __afl_temp(%rip), %rsi /* data */\n"
" movq $" STRINGIFY(FORKSRV_FD) ", %rdi /* file desc */\n"
CALL_L64("read")
" cmpq $4, %rax\n"
" jne __afl_die\n"
"\n"
" /* Once woken up, create a clone of our process. This is an excellent use\n"
" case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n"
" caches getpid() results and offers no way to update the value, breaking\n"
" abort(), raise(), and a bunch of other things :-( */\n"
"\n"
CALL_L64("fork")
" cmpq $0, %rax\n"
" jl __afl_die\n"
" je __afl_fork_resume\n"
"\n"
" /* In parent process: write PID to pipe, then wait for child. */\n"
"\n"
" movl %eax, __afl_fork_pid(%rip)\n"
"\n"
" movq $4, %rdx /* length */\n"
" leaq __afl_fork_pid(%rip), %rsi /* data */\n"
" movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi /* file desc */\n"
CALL_L64("write")
"\n"
" movq $0, %rdx /* no flags */\n"
" leaq __afl_temp(%rip), %rsi /* status */\n"
" movq __afl_fork_pid(%rip), %rdi /* PID */\n"
CALL_L64("waitpid")
" cmpq $0, %rax\n"
" jle __afl_die\n"
"\n"
" /* Relay wait status to pipe, then loop back. */\n"
"\n"
" movq $4, %rdx /* length */\n"
" leaq __afl_temp(%rip), %rsi /* data */\n"
" movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi /* file desc */\n"
CALL_L64("write")
"\n"
" jmp __afl_fork_wait_loop\n"
"\n"
"__afl_fork_resume:\n"
"\n"
" /* In child process: close fds, resume execution. */\n"
"\n"
" movq $" STRINGIFY(FORKSRV_FD) ", %rdi\n"
CALL_L64("close")
"\n"
" movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi\n"
CALL_L64("close")
"\n"
" popq %rdx\n"
" popq %rdx\n"
"\n"
" movq %r12, %rsp\n"
" popq %r12\n"
"\n"
" movq 0(%rsp), %rax\n"
" movq 8(%rsp), %rcx\n"
" movq 16(%rsp), %rdi\n"
" movq 32(%rsp), %rsi\n"
" movq 40(%rsp), %r8\n"
" movq 48(%rsp), %r9\n"
" movq 56(%rsp), %r10\n"
" movq 64(%rsp), %r11\n"
"\n"
" movq 96(%rsp), %xmm0\n"
" movq 112(%rsp), %xmm1\n"
" movq 128(%rsp), %xmm2\n"
" movq 144(%rsp), %xmm3\n"
" movq 160(%rsp), %xmm4\n"
" movq 176(%rsp), %xmm5\n"
" movq 192(%rsp), %xmm6\n"
" movq 208(%rsp), %xmm7\n"
" movq 224(%rsp), %xmm8\n"
" movq 240(%rsp), %xmm9\n"
" movq 256(%rsp), %xmm10\n"
" movq 272(%rsp), %xmm11\n"
" movq 288(%rsp), %xmm12\n"
" movq 304(%rsp), %xmm13\n"
" movq 320(%rsp), %xmm14\n"
" movq 336(%rsp), %xmm15\n"
"\n"
" leaq 352(%rsp), %rsp\n"
"\n"
" jmp __afl_store\n"
"\n"
"__afl_die:\n"
"\n"
" xorq %rax, %rax\n"
CALL_L64("_exit")
"\n"
"__afl_setup_abort:\n"
"\n"
" /* Record setup failure so that we don't keep calling\n"
" shmget() / shmat() over and over again. */\n"
"\n"
" incb __afl_setup_failure(%rip)\n"
"\n"
" movq %r12, %rsp\n"
" popq %r12\n"
"\n"
" movq 0(%rsp), %rax\n"
" movq 8(%rsp), %rcx\n"
" movq 16(%rsp), %rdi\n"
" movq 32(%rsp), %rsi\n"
" movq 40(%rsp), %r8\n"
" movq 48(%rsp), %r9\n"
" movq 56(%rsp), %r10\n"
" movq 64(%rsp), %r11\n"
"\n"
" movq 96(%rsp), %xmm0\n"
" movq 112(%rsp), %xmm1\n"
" movq 128(%rsp), %xmm2\n"
" movq 144(%rsp), %xmm3\n"
" movq 160(%rsp), %xmm4\n"
" movq 176(%rsp), %xmm5\n"
" movq 192(%rsp), %xmm6\n"
" movq 208(%rsp), %xmm7\n"
" movq 224(%rsp), %xmm8\n"
" movq 240(%rsp), %xmm9\n"
" movq 256(%rsp), %xmm10\n"
" movq 272(%rsp), %xmm11\n"
" movq 288(%rsp), %xmm12\n"
" movq 304(%rsp), %xmm13\n"
" movq 320(%rsp), %xmm14\n"
" movq 336(%rsp), %xmm15\n"
"\n"
" leaq 352(%rsp), %rsp\n"
"\n"
" jmp __afl_return\n"
"\n"
".AFL_VARS:\n"
"\n"
#ifdef __APPLE__
" .comm __afl_area_ptr, 8\n"
#ifndef COVERAGE_ONLY
" .comm __afl_prev_loc, 8\n"
#endif /* !COVERAGE_ONLY */
" .comm __afl_fork_pid, 4\n"
" .comm __afl_temp, 4\n"
" .comm __afl_setup_failure, 1\n"
#else
" .lcomm __afl_area_ptr, 8\n"
#ifndef COVERAGE_ONLY
" .lcomm __afl_prev_loc, 8\n"
#endif /* !COVERAGE_ONLY */
" .lcomm __afl_fork_pid, 4\n"
" .lcomm __afl_temp, 4\n"
" .lcomm __afl_setup_failure, 1\n"
#endif /* ^__APPLE__ */
" .comm __afl_global_area_ptr, 8, 8\n"
"\n"
".AFL_SHM_ENV:\n"
" .asciz \"" SHM_ENV_VAR "\"\n"
"\n"
"/* --- END --- */\n"
"\n";
#endif /* !_HAVE_AFL_AS_H */

@ -0,0 +1,461 @@
#!/usr/bin/env bash
#
# american fuzzy lop - corpus minimization tool
# ---------------------------------------------
#
# Written and maintained by Michal Zalewski <lcamtuf@google.com>
#
# Copyright 2014, 2015 Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# This tool tries to find the smallest subset of files in the input directory
# that still trigger the full range of instrumentation data points seen in
# the starting corpus. This has two uses:
#
# - Screening large corpora of input files before using them as a seed for
# afl-fuzz. The tool will remove functionally redundant files and likely
# leave you with a much smaller set.
#
# (In this case, you probably also want to consider running afl-tmin on
# the individual files later on to reduce their size.)
#
# - Minimizing the corpus generated organically by afl-fuzz, perhaps when
# planning to feed it to more resource-intensive tools. The tool achieves
# this by removing all entries that used to trigger unique behaviors in the
# past, but have been made obsolete by later finds.
#
# Note that the tool doesn't modify the files themselves. For that, you want
# afl-tmin.
#
# This script must use bash because other shells may have hardcoded limits on
# array sizes.
#
echo "corpus minimization tool for afl-fuzz by <lcamtuf@google.com>"
echo
#########
# SETUP #
#########
# Process command-line options...
MEM_LIMIT=100
TIMEOUT=none
unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE
while getopts "+i:o:f:m:t:eQC" opt; do
case "$opt" in
"i")
IN_DIR="$OPTARG"
;;
"o")
OUT_DIR="$OPTARG"
;;
"f")
STDIN_FILE="$OPTARG"
;;
"m")
MEM_LIMIT="$OPTARG"
MEM_LIMIT_GIVEN=1
;;
"t")
TIMEOUT="$OPTARG"
;;
"e")
EXTRA_PAR="$EXTRA_PAR -e"
;;
"C")
export AFL_CMIN_CRASHES_ONLY=1
;;
"Q")
EXTRA_PAR="$EXTRA_PAR -Q"
test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
QEMU_MODE=1
;;
"?")
exit 1
;;
esac
done
shift $((OPTIND-1))
TARGET_BIN="$1"
if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then
cat 1>&2 <<_EOF_
Usage: $0 [ options ] -- /path/to/target_app [ ... ]
Required parameters:
-i dir - input directory with the starting corpus
-o dir - output directory for minimized files
Execution control settings:
-f file - location read by the fuzzed program (stdin)
-m megs - memory limit for child process ($MEM_LIMIT MB)
-t msec - run time limit for child process (none)
-Q - use binary-only instrumentation (QEMU mode)
Minimization settings:
-C - keep crashing inputs, reject everything else
-e - solve for edge coverage only, ignore hit counts
For additional tips, please consult docs/README.
_EOF_
exit 1
fi
# Do a sanity check to discourage the use of /tmp, since we can't really
# handle this safely from a shell script.
if [ "$AFL_ALLOW_TMP" = "" ]; then
echo "$IN_DIR" | grep -qE '^(/var)?/tmp/'
T1="$?"
echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/'
T2="$?"
echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/'
T3="$?"
echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/'
T4="$?"
echo "$PWD" | grep -qE '^(/var)?/tmp/'
T5="$?"
if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then
echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2
exit 1
fi
fi
# If @@ is specified, but there's no -f, let's come up with a temporary input
# file name.
TRACE_DIR="$OUT_DIR/.traces"
if [ "$STDIN_FILE" = "" ]; then
if echo "$*" | grep -qF '@@'; then
STDIN_FILE="$TRACE_DIR/.cur_input"
fi
fi
# Check for obvious errors.
if [ ! "$MEM_LIMIT" = "none" ]; then
if [ "$MEM_LIMIT" -lt "5" ]; then
echo "[-] Error: dangerously low memory limit." 1>&2
exit 1
fi
fi
if [ ! "$TIMEOUT" = "none" ]; then
if [ "$TIMEOUT" -lt "10" ]; then
echo "[-] Error: dangerously low timeout." 1>&2
exit 1
fi
fi
if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then
TNEW="`which "$TARGET_BIN" 2>/dev/null`"
if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then
echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2
exit 1
fi
TARGET_BIN="$TNEW"
fi
if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" ]; then
if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then
echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2
exit 1
fi
fi
if [ ! -d "$IN_DIR" ]; then
echo "[-] Error: directory '$IN_DIR' not found." 1>&2
exit 1
fi
test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue"
find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null
rm -rf "$TRACE_DIR" 2>/dev/null
rmdir "$OUT_DIR" 2>/dev/null
if [ -d "$OUT_DIR" ]; then
echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2
exit 1
fi
mkdir -m 700 -p "$TRACE_DIR" || exit 1
if [ ! "$STDIN_FILE" = "" ]; then
rm -f "$STDIN_FILE" || exit 1
touch "$STDIN_FILE" || exit 1
fi
if [ "$AFL_PATH" = "" ]; then
SHOWMAP="${0%/afl-cmin}/afl-showmap"
else
SHOWMAP="$AFL_PATH/afl-showmap"
fi
if [ ! -x "$SHOWMAP" ]; then
echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2
rm -rf "$TRACE_DIR"
exit 1
fi
IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`))
if [ "$IN_COUNT" = "0" ]; then
echo "[+] Hmm, no inputs in the target directory. Nothing to be done."
rm -rf "$TRACE_DIR"
exit 1
fi
FIRST_FILE=`ls "$IN_DIR" | head -1`
# Make sure that we're not dealing with a directory.
if [ -d "$IN_DIR/$FIRST_FILE" ]; then
echo "[-] Error: The target directory contains subdirectories - please fix." 1>&2
rm -rf "$TRACE_DIR"
exit 1
fi
# Check for the more efficient way to copy files...
if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then
CP_TOOL=ln
else
CP_TOOL=cp
fi
# Make sure that we can actually get anything out of afl-showmap before we
# waste too much time.
echo "[*] Testing the target binary..."
if [ "$STDIN_FILE" = "" ]; then
AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE"
else
cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE"
AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
fi
FIRST_COUNT=$((`grep -c . "$TRACE_DIR/.run_test"`))
if [ "$FIRST_COUNT" -gt "0" ]; then
echo "[+] OK, $FIRST_COUNT tuples recorded."
else
echo "[-] Error: no instrumentation output detected (perhaps crash or timeout)." 1>&2
test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
exit 1
fi
# Let's roll!
#############################
# STEP 1: COLLECTING TRACES #
#############################
echo "[*] Obtaining traces for input files in '$IN_DIR'..."
(
CUR=0
if [ "$STDIN_FILE" = "" ]; then
while read -r fn; do
CUR=$((CUR+1))
printf "\\r Processing file $CUR/$IN_COUNT... "
"$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn"
done < <(ls "$IN_DIR")
else
while read -r fn; do
CUR=$((CUR+1))
printf "\\r Processing file $CUR/$IN_COUNT... "
cp "$IN_DIR/$fn" "$STDIN_FILE"
"$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
done < <(ls "$IN_DIR")
fi
)
echo
##########################
# STEP 2: SORTING TUPLES #
##########################
# With this out of the way, we sort all tuples by popularity across all
# datasets. The reasoning here is that we won't be able to avoid the files
# that trigger unique tuples anyway, so we will want to start with them and
# see what's left.
echo "[*] Sorting trace sets (this may take a while)..."
ls "$IN_DIR" | sed "s#^#$TRACE_DIR/#" | tr '\n' '\0' | xargs -0 -n 1 cat | \
sort | uniq -c | sort -n >"$TRACE_DIR/.all_uniq"
TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`))
echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files."
#####################################
# STEP 3: SELECTING CANDIDATE FILES #
#####################################
# The next step is to find the best candidate for each tuple. The "best"
# part is understood simply as the smallest input that includes a particular
# tuple in its trace. Empirical evidence suggests that this produces smaller
# datasets than more involved algorithms that could be still pulled off in
# a shell script.
echo "[*] Finding best candidates for each tuple..."
CUR=0
while read -r fn; do
CUR=$((CUR+1))
printf "\\r Processing file $CUR/$IN_COUNT... "
sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list"
done < <(ls -rS "$IN_DIR")
echo
##############################
# STEP 4: LOADING CANDIDATES #
##############################
# At this point, we have a file of tuple-file pairs, sorted by file size
# in ascending order (as a consequence of ls -rS). By doing sort keyed
# only by tuple (-k 1,1) and configured to output only the first line for
# every key (-s -u), we end up with the smallest file for each tuple.
echo "[*] Sorting candidate list (be patient)..."
sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \
sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script"
if [ ! -s "$TRACE_DIR/.candidate_script" ]; then
echo "[-] Error: no traces obtained from test cases, check syntax!" 1>&2
test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
exit 1
fi
# The sed command converted the sorted list to a shell script that populates
# BEST_FILE[tuple]="fname". Let's load that!
. "$TRACE_DIR/.candidate_script"
##########################
# STEP 5: WRITING OUTPUT #
##########################
# The final trick is to grab the top pick for each tuple, unless said tuple is
# already set due to the inclusion of an earlier candidate; and then put all
# tuples associated with the newly-added file to the "already have" list. The
# loop works from least popular tuples and toward the most common ones.
echo "[*] Processing candidates and writing output files..."
CUR=0
touch "$TRACE_DIR/.already_have"
while read -r cnt tuple; do
CUR=$((CUR+1))
printf "\\r Processing tuple $CUR/$TUPLE_COUNT... "
# If we already have this tuple, skip it.
grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue
FN=${BEST_FILE[tuple]}
$CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN"
if [ "$((CUR % 5))" = "0" ]; then
sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp"
mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have"
else
cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have"
fi
done <"$TRACE_DIR/.all_uniq"
echo
OUT_COUNT=`ls -- "$OUT_DIR" | wc -l`
if [ "$OUT_COUNT" = "1" ]; then
echo "[!] WARNING: All test cases had the same traces, check syntax!"
fi
echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'."
echo
test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
exit 0

File diff suppressed because it is too large Load Diff

@ -0,0 +1,346 @@
/*
Copyright 2013 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - wrapper for GCC and clang
----------------------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
This program is a drop-in replacement for GCC or clang. The most common way
of using it is to pass the path to afl-gcc or afl-clang via CC when invoking
./configure.
(Of course, use CXX and point it to afl-g++ / afl-clang++ for C++ code.)
The wrapper needs to know the path to afl-as (renamed to 'as'). The default
is /usr/local/lib/afl/. A convenient way to specify alternative directories
would be to set AFL_PATH.
If AFL_HARDEN is set, the wrapper will compile the target app with various
hardening options that may help detect memory management issues more
reliably. You can also specify AFL_USE_ASAN to enable ASAN.
If you want to call a non-default compiler as a next step of the chain,
specify its location via AFL_CC or AFL_CXX.
*/
#define AFL_MAIN
#include "config.h"
#include "types.h"
#include "debug.h"
#include "alloc-inl.h"
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
static u8* as_path; /* Path to the AFL 'as' wrapper */
static u8** cc_params; /* Parameters passed to the real CC */
static u32 cc_par_cnt = 1; /* Param count, including argv0 */
static u8 be_quiet, /* Quiet mode */
clang_mode; /* Invoked as afl-clang*? */
/* Try to find our "fake" GNU assembler in AFL_PATH or at the location derived
from argv[0]. If that fails, abort. */
static void find_as(u8* argv0) {
u8 *afl_path = getenv("AFL_PATH");
u8 *slash, *tmp;
if (afl_path) {
tmp = alloc_printf("%s/as", afl_path);
if (!access(tmp, X_OK)) {
as_path = afl_path;
ck_free(tmp);
return;
}
ck_free(tmp);
}
slash = strrchr(argv0, '/');
if (slash) {
u8 *dir;
*slash = 0;
dir = ck_strdup(argv0);
*slash = '/';
tmp = alloc_printf("%s/afl-as", dir);
if (!access(tmp, X_OK)) {
as_path = dir;
ck_free(tmp);
return;
}
ck_free(tmp);
ck_free(dir);
}
if (!access(AFL_PATH "/as", X_OK)) {
as_path = AFL_PATH;
return;
}
FATAL("Unable to find AFL wrapper binary for 'as'. Please set AFL_PATH");
}
/* Copy argv to cc_params, making the necessary edits. */
static void edit_params(u32 argc, char** argv) {
u8 fortify_set = 0, asan_set = 0;
u8 *name;
#if defined(__FreeBSD__) && defined(__x86_64__)
u8 m32_set = 0;
#endif
cc_params = ck_alloc((argc + 128) * sizeof(u8*));
name = strrchr(argv[0], '/');
if (!name) name = argv[0]; else name++;
if (!strncmp(name, "afl-clang", 9)) {
clang_mode = 1;
setenv(CLANG_ENV_VAR, "1", 1);
if (!strcmp(name, "afl-clang++")) {
u8* alt_cxx = getenv("AFL_CXX");
cc_params[0] = alt_cxx ? alt_cxx : (u8*)"clang++";
} else {
u8* alt_cc = getenv("AFL_CC");
cc_params[0] = alt_cc ? alt_cc : (u8*)"clang";
}
} else {
/* With GCJ and Eclipse installed, you can actually compile Java! The
instrumentation will work (amazingly). Alas, unhandled exceptions do
not call abort(), so afl-fuzz would need to be modified to equate
non-zero exit codes with crash conditions when working with Java
binaries. Meh. */
#ifdef __APPLE__
if (!strcmp(name, "afl-g++")) cc_params[0] = getenv("AFL_CXX");
else if (!strcmp(name, "afl-gcj")) cc_params[0] = getenv("AFL_GCJ");
else cc_params[0] = getenv("AFL_CC");
if (!cc_params[0]) {
SAYF("\n" cLRD "[-] " cRST
"On Apple systems, 'gcc' is usually just a wrapper for clang. Please use the\n"
" 'afl-clang' utility instead of 'afl-gcc'. If you really have GCC installed,\n"
" set AFL_CC or AFL_CXX to specify the correct path to that compiler.\n");
FATAL("AFL_CC or AFL_CXX required on MacOS X");
}
#else
if (!strcmp(name, "afl-g++")) {
u8* alt_cxx = getenv("AFL_CXX");
cc_params[0] = alt_cxx ? alt_cxx : (u8*)"g++";
} else if (!strcmp(name, "afl-gcj")) {
u8* alt_cc = getenv("AFL_GCJ");
cc_params[0] = alt_cc ? alt_cc : (u8*)"gcj";
} else {
u8* alt_cc = getenv("AFL_CC");
cc_params[0] = alt_cc ? alt_cc : (u8*)"gcc";
}
#endif /* __APPLE__ */
}
while (--argc) {
u8* cur = *(++argv);
if (!strncmp(cur, "-B", 2)) {
if (!be_quiet) WARNF("-B is already set, overriding");
if (!cur[2] && argc > 1) { argc--; argv++; }
continue;
}
if (!strcmp(cur, "-integrated-as")) continue;
if (!strcmp(cur, "-pipe")) continue;
#if defined(__FreeBSD__) && defined(__x86_64__)
if (!strcmp(cur, "-m32")) m32_set = 1;
#endif
if (!strcmp(cur, "-fsanitize=address") ||
!strcmp(cur, "-fsanitize=memory")) asan_set = 1;
if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1;
cc_params[cc_par_cnt++] = cur;
}
cc_params[cc_par_cnt++] = "-B";
cc_params[cc_par_cnt++] = as_path;
if (clang_mode)
cc_params[cc_par_cnt++] = "-no-integrated-as";
if (getenv("AFL_HARDEN")) {
cc_params[cc_par_cnt++] = "-fstack-protector-all";
if (!fortify_set)
cc_params[cc_par_cnt++] = "-D_FORTIFY_SOURCE=2";
}
if (asan_set) {
/* Pass this on to afl-as to adjust map density. */
setenv("AFL_USE_ASAN", "1", 1);
} else if (getenv("AFL_USE_ASAN")) {
if (getenv("AFL_USE_MSAN"))
FATAL("ASAN and MSAN are mutually exclusive");
if (getenv("AFL_HARDEN"))
FATAL("ASAN and AFL_HARDEN are mutually exclusive");
cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE";
cc_params[cc_par_cnt++] = "-fsanitize=address";
} else if (getenv("AFL_USE_MSAN")) {
if (getenv("AFL_USE_ASAN"))
FATAL("ASAN and MSAN are mutually exclusive");
if (getenv("AFL_HARDEN"))
FATAL("MSAN and AFL_HARDEN are mutually exclusive");
cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE";
cc_params[cc_par_cnt++] = "-fsanitize=memory";
}
if (!getenv("AFL_DONT_OPTIMIZE")) {
#if defined(__FreeBSD__) && defined(__x86_64__)
/* On 64-bit FreeBSD systems, clang -g -m32 is broken, but -m32 itself
works OK. This has nothing to do with us, but let's avoid triggering
that bug. */
if (!clang_mode || !m32_set)
cc_params[cc_par_cnt++] = "-g";
#else
cc_params[cc_par_cnt++] = "-g";
#endif
cc_params[cc_par_cnt++] = "-O3";
cc_params[cc_par_cnt++] = "-funroll-loops";
/* Two indicators that you're building for fuzzing; one of them is
AFL-specific, the other is shared with libfuzzer. */
cc_params[cc_par_cnt++] = "-D__AFL_COMPILER=1";
cc_params[cc_par_cnt++] = "-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1";
}
if (getenv("AFL_NO_BUILTIN")) {
cc_params[cc_par_cnt++] = "-fno-builtin-strcmp";
cc_params[cc_par_cnt++] = "-fno-builtin-strncmp";
cc_params[cc_par_cnt++] = "-fno-builtin-strcasecmp";
cc_params[cc_par_cnt++] = "-fno-builtin-strncasecmp";
cc_params[cc_par_cnt++] = "-fno-builtin-memcmp";
cc_params[cc_par_cnt++] = "-fno-builtin-strstr";
cc_params[cc_par_cnt++] = "-fno-builtin-strcasestr";
}
cc_params[cc_par_cnt] = NULL;
}
/* Main entry point */
int main(int argc, char** argv) {
if (isatty(2) && !getenv("AFL_QUIET")) {
SAYF(cCYA "afl-cc " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
} else be_quiet = 1;
if (argc < 2) {
SAYF("\n"
"This is a helper application for afl-fuzz. It serves as a drop-in replacement\n"
"for gcc or clang, letting you recompile third-party code with the required\n"
"runtime instrumentation. A common use pattern would be one of the following:\n\n"
" CC=%s/afl-gcc ./configure\n"
" CXX=%s/afl-g++ ./configure\n\n"
"You can specify custom next-stage toolchain via AFL_CC, AFL_CXX, and AFL_AS.\n"
"Setting AFL_HARDEN enables hardening optimizations in the compiled code.\n\n",
BIN_PATH, BIN_PATH);
exit(1);
}
find_as(argv[0]);
edit_params(argc, argv);
execvp(cc_params[0], (char**)cc_params);
FATAL("Oops, failed to execute '%s' - check your PATH", cc_params[0]);
return 0;
}

@ -0,0 +1,260 @@
/*
Copyright 2015 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - free CPU gizmo
-----------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
This tool provides a fairly accurate measurement of CPU preemption rate.
It is meant to complement the quick-and-dirty load average widget shown
in the afl-fuzz UI. See docs/parallel_fuzzing.txt for more info.
For some work loads, the tool may actually suggest running more instances
than you have CPU cores. This can happen if the tested program is spending
a portion of its run time waiting for I/O, rather than being 100%
CPU-bound.
The idea for the getrusage()-based approach comes from Jakub Wilk.
*/
#define AFL_MAIN
#include "android-ashmem.h"
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sched.h>
#include <sys/time.h>
#include <sys/times.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include "types.h"
#include "debug.h"
#ifdef __linux__
# define HAVE_AFFINITY 1
#endif /* __linux__ */
/* Get unix time in microseconds. */
static u64 get_cur_time_us(void) {
struct timeval tv;
struct timezone tz;
gettimeofday(&tv, &tz);
return (tv.tv_sec * 1000000ULL) + tv.tv_usec;
}
/* Get CPU usage in microseconds. */
static u64 get_cpu_usage_us(void) {
struct rusage u;
getrusage(RUSAGE_SELF, &u);
return (u.ru_utime.tv_sec * 1000000ULL) + u.ru_utime.tv_usec +
(u.ru_stime.tv_sec * 1000000ULL) + u.ru_stime.tv_usec;
}
/* Measure preemption rate. */
static u32 measure_preemption(u32 target_ms) {
static volatile u32 v1, v2;
u64 st_t, en_t, st_c, en_c, real_delta, slice_delta;
s32 loop_repeats = 0;
st_t = get_cur_time_us();
st_c = get_cpu_usage_us();
repeat_loop:
v1 = CTEST_BUSY_CYCLES;
while (v1--) v2++;
sched_yield();
en_t = get_cur_time_us();
if (en_t - st_t < target_ms * 1000) {
loop_repeats++;
goto repeat_loop;
}
/* Let's see what percentage of this time we actually had a chance to
run, and how much time was spent in the penalty box. */
en_c = get_cpu_usage_us();
real_delta = (en_t - st_t) / 1000;
slice_delta = (en_c - st_c) / 1000;
return real_delta * 100 / slice_delta;
}
/* Do the benchmark thing. */
int main(int argc, char** argv) {
#ifdef HAVE_AFFINITY
u32 cpu_cnt = sysconf(_SC_NPROCESSORS_ONLN),
idle_cpus = 0, maybe_cpus = 0, i;
SAYF(cCYA "afl-gotcpu " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
ACTF("Measuring per-core preemption rate (this will take %0.02f sec)...",
((double)CTEST_CORE_TRG_MS) / 1000);
for (i = 0; i < cpu_cnt; i++) {
s32 fr = fork();
if (fr < 0) PFATAL("fork failed");
if (!fr) {
cpu_set_t c;
u32 util_perc;
CPU_ZERO(&c);
CPU_SET(i, &c);
if (sched_setaffinity(0, sizeof(c), &c))
PFATAL("sched_setaffinity failed for cpu %d", i);
util_perc = measure_preemption(CTEST_CORE_TRG_MS);
if (util_perc < 110) {
SAYF(" Core #%u: " cLGN "AVAILABLE " cRST "(%u%%)\n", i, util_perc);
exit(0);
} else if (util_perc < 250) {
SAYF(" Core #%u: " cYEL "CAUTION " cRST "(%u%%)\n", i, util_perc);
exit(1);
}
SAYF(" Core #%u: " cLRD "OVERBOOKED " cRST "(%u%%)\n" cRST, i,
util_perc);
exit(2);
}
}
for (i = 0; i < cpu_cnt; i++) {
int ret;
if (waitpid(-1, &ret, 0) < 0) PFATAL("waitpid failed");
if (WEXITSTATUS(ret) == 0) idle_cpus++;
if (WEXITSTATUS(ret) <= 1) maybe_cpus++;
}
SAYF(cGRA "\n>>> ");
if (idle_cpus) {
if (maybe_cpus == idle_cpus) {
SAYF(cLGN "PASS: " cRST "You can run more processes on %u core%s.",
idle_cpus, idle_cpus > 1 ? "s" : "");
} else {
SAYF(cLGN "PASS: " cRST "You can run more processes on %u to %u core%s.",
idle_cpus, maybe_cpus, maybe_cpus > 1 ? "s" : "");
}
SAYF(cGRA " <<<" cRST "\n\n");
return 0;
}
if (maybe_cpus) {
SAYF(cYEL "CAUTION: " cRST "You may still have %u core%s available.",
maybe_cpus, maybe_cpus > 1 ? "s" : "");
SAYF(cGRA " <<<" cRST "\n\n");
return 1;
}
SAYF(cLRD "FAIL: " cRST "All cores are overbooked.");
SAYF(cGRA " <<<" cRST "\n\n");
return 2;
#else
u32 util_perc;
SAYF(cCYA "afl-gotcpu " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
/* Run a busy loop for CTEST_TARGET_MS. */
ACTF("Measuring gross preemption rate (this will take %0.02f sec)...",
((double)CTEST_TARGET_MS) / 1000);
util_perc = measure_preemption(CTEST_TARGET_MS);
/* Deliver the final verdict. */
SAYF(cGRA "\n>>> ");
if (util_perc < 105) {
SAYF(cLGN "PASS: " cRST "You can probably run additional processes.");
} else if (util_perc < 130) {
SAYF(cYEL "CAUTION: " cRST "Your CPU may be somewhat overbooked (%u%%).",
util_perc);
} else {
SAYF(cLRD "FAIL: " cRST "Your CPU is overbooked (%u%%).", util_perc);
}
SAYF(cGRA " <<<" cRST "\n\n");
return (util_perc > 105) + (util_perc > 130);
#endif /* ^HAVE_AFFINITY */
}

@ -0,0 +1,170 @@
#!/bin/sh
#
# american fuzzy lop - Advanced Persistent Graphing
# -------------------------------------------------
#
# Written and maintained by Michal Zalewski <lcamtuf@google.com>
# Based on a design & prototype by Michael Rash.
#
# Copyright 2014, 2015 Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
echo "progress plotting utility for afl-fuzz by <lcamtuf@google.com>"
echo
if [ ! "$#" = "2" ]; then
cat 1>&2 <<_EOF_
This program generates gnuplot images from afl-fuzz output data. Usage:
$0 afl_state_dir graph_output_dir
The afl_state_dir parameter should point to an existing state directory for any
active or stopped instance of afl-fuzz; while graph_output_dir should point to
an empty directory where this tool can write the resulting plots to.
The program will put index.html and three PNG images in the output directory;
you should be able to view it with any web browser of your choice.
_EOF_
exit 1
fi
if [ "$AFL_ALLOW_TMP" = "" ]; then
echo "$1" | grep -qE '^(/var)?/tmp/'
T1="$?"
echo "$2" | grep -qE '^(/var)?/tmp/'
T2="$?"
if [ "$T1" = "0" -o "$T2" = "0" ]; then
echo "[-] Error: this script shouldn't be used with shared /tmp directories." 1>&2
exit 1
fi
fi
if [ ! -f "$1/plot_data" ]; then
echo "[-] Error: input directory is not valid (missing 'plot_data')." 1>&2
exit 1
fi
BANNER="`cat "$1/fuzzer_stats" | grep '^afl_banner ' | cut -d: -f2- | cut -b2-`"
test "$BANNER" = "" && BANNER="(none)"
GNUPLOT=`which gnuplot 2>/dev/null`
if [ "$GNUPLOT" = "" ]; then
echo "[-] Error: can't find 'gnuplot' in your \$PATH." 1>&2
exit 1
fi
mkdir "$2" 2>/dev/null
if [ ! -d "$2" ]; then
echo "[-] Error: unable to create the output directory - pick another location." 1>&2
exit 1
fi
rm -f "$2/high_freq.png" "$2/low_freq.png" "$2/exec_speed.png"
mv -f "$2/index.html" "$2/index.html.orig" 2>/dev/null
echo "[*] Generating plots..."
(
cat <<_EOF_
set terminal png truecolor enhanced size 1000,300 butt
set output '$2/high_freq.png'
set xdata time
set timefmt '%s'
set format x "%b %d\n%H:%M"
set tics font 'small'
unset mxtics
unset mytics
set grid xtics linetype 0 linecolor rgb '#e0e0e0'
set grid ytics linetype 0 linecolor rgb '#e0e0e0'
set border linecolor rgb '#50c0f0'
set tics textcolor rgb '#000000'
set key outside
set autoscale xfixmin
set autoscale xfixmax
plot '$1/plot_data' using 1:4 with filledcurve x1 title 'total paths' linecolor rgb '#000000' fillstyle transparent solid 0.2 noborder, \\
'' using 1:3 with filledcurve x1 title 'current path' linecolor rgb '#f0f0f0' fillstyle transparent solid 0.5 noborder, \\
'' using 1:5 with lines title 'pending paths' linecolor rgb '#0090ff' linewidth 3, \\
'' using 1:6 with lines title 'pending favs' linecolor rgb '#c00080' linewidth 3, \\
'' using 1:2 with lines title 'cycles done' linecolor rgb '#c000f0' linewidth 3
set terminal png truecolor enhanced size 1000,200 butt
set output '$2/low_freq.png'
plot '$1/plot_data' using 1:8 with filledcurve x1 title '' linecolor rgb '#c00080' fillstyle transparent solid 0.2 noborder, \\
'' using 1:8 with lines title ' uniq crashes' linecolor rgb '#c00080' linewidth 3, \\
'' using 1:9 with lines title 'uniq hangs' linecolor rgb '#c000f0' linewidth 3, \\
'' using 1:10 with lines title 'levels' linecolor rgb '#0090ff' linewidth 3
set terminal png truecolor enhanced size 1000,200 butt
set output '$2/exec_speed.png'
plot '$1/plot_data' using 1:11 with filledcurve x1 title '' linecolor rgb '#0090ff' fillstyle transparent solid 0.2 noborder, \\
'$1/plot_data' using 1:11 with lines title ' execs/sec' linecolor rgb '#0090ff' linewidth 3 smooth bezier;
_EOF_
) | gnuplot
if [ ! -s "$2/exec_speed.png" ]; then
echo "[-] Error: something went wrong! Perhaps you have an ancient version of gnuplot?" 1>&2
exit 1
fi
echo "[*] Generating index.html..."
cat >"$2/index.html" <<_EOF_
<table style="font-family: 'Trebuchet MS', 'Tahoma', 'Arial', 'Helvetica'">
<tr><td style="width: 18ex"><b>Banner:</b></td><td>$BANNER</td></tr>
<tr><td><b>Directory:</b></td><td>$1</td></tr>
<tr><td><b>Generated on:</b></td><td>`date`</td></tr>
</table>
<p>
<img src="high_freq.png" width=1000 height=300><p>
<img src="low_freq.png" width=1000 height=200><p>
<img src="exec_speed.png" width=1000 height=200>
_EOF_
# Make it easy to remotely view results when outputting directly to a directory
# served by Apache or other HTTP daemon. Since the plots aren't horribly
# sensitive, this seems like a reasonable trade-off.
chmod 755 "$2"
chmod 644 "$2/high_freq.png" "$2/low_freq.png" "$2/exec_speed.png" "$2/index.html"
echo "[+] All done - enjoy your charts!"
exit 0

@ -0,0 +1,795 @@
/*
Copyright 2013 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - map display utility
----------------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
A very simple tool that runs the targeted binary and displays
the contents of the trace bitmap in a human-readable form. Useful in
scripts to eliminate redundant inputs and perform other checks.
Exit code is 2 if the target program crashes; 1 if it times out or
there is a problem executing it; or 0 if execution is successful.
*/
#define AFL_MAIN
#include "android-ashmem.h"
#include "config.h"
#include "types.h"
#include "debug.h"
#include "alloc-inl.h"
#include "hash.h"
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <errno.h>
#include <signal.h>
#include <dirent.h>
#include <fcntl.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/resource.h>
static s32 child_pid; /* PID of the tested program */
static u8* trace_bits; /* SHM with instrumentation bitmap */
static u8 *out_file, /* Trace output file */
*doc_path, /* Path to docs */
*target_path, /* Path to target binary */
*at_file; /* Substitution string for @@ */
static u32 exec_tmout; /* Exec timeout (ms) */
static u64 mem_limit = MEM_LIMIT; /* Memory limit (MB) */
static s32 shm_id; /* ID of the SHM region */
static u8 quiet_mode, /* Hide non-essential messages? */
edges_only, /* Ignore hit counts? */
cmin_mode, /* Generate output in afl-cmin mode? */
binary_mode, /* Write output as a binary map */
keep_cores; /* Allow coredumps? */
static volatile u8
stop_soon, /* Ctrl-C pressed? */
child_timed_out, /* Child timed out? */
child_crashed; /* Child crashed? */
/* Classify tuple counts. Instead of mapping to individual bits, as in
afl-fuzz.c, we map to more user-friendly numbers between 1 and 8. */
static const u8 count_class_human[256] = {
[0] = 0,
[1] = 1,
[2] = 2,
[3] = 3,
[4 ... 7] = 4,
[8 ... 15] = 5,
[16 ... 31] = 6,
[32 ... 127] = 7,
[128 ... 255] = 8
};
static const u8 count_class_binary[256] = {
[0] = 0,
[1] = 1,
[2] = 2,
[3] = 4,
[4 ... 7] = 8,
[8 ... 15] = 16,
[16 ... 31] = 32,
[32 ... 127] = 64,
[128 ... 255] = 128
};
static void classify_counts(u8* mem, const u8* map) {
u32 i = MAP_SIZE;
if (edges_only) {
while (i--) {
if (*mem) *mem = 1;
mem++;
}
} else {
while (i--) {
*mem = map[*mem];
mem++;
}
}
}
/* Get rid of shared memory (atexit handler). */
static void remove_shm(void) {
shmctl(shm_id, IPC_RMID, NULL);
}
/* Configure shared memory. */
static void setup_shm(void) {
u8* shm_str;
shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600);
if (shm_id < 0) PFATAL("shmget() failed");
atexit(remove_shm);
shm_str = alloc_printf("%d", shm_id);
setenv(SHM_ENV_VAR, shm_str, 1);
ck_free(shm_str);
trace_bits = shmat(shm_id, NULL, 0);
if (trace_bits == (void *)-1) PFATAL("shmat() failed");
}
/* Write results. */
static u32 write_results(void) {
s32 fd;
u32 i, ret = 0;
u8 cco = !!getenv("AFL_CMIN_CRASHES_ONLY"),
caa = !!getenv("AFL_CMIN_ALLOW_ANY");
if (!strncmp(out_file, "/dev/", 5)) {
fd = open(out_file, O_WRONLY, 0600);
if (fd < 0) PFATAL("Unable to open '%s'", out_file);
} else if (!strcmp(out_file, "-")) {
fd = dup(1);
if (fd < 0) PFATAL("Unable to open stdout");
} else {
unlink(out_file); /* Ignore errors */
fd = open(out_file, O_WRONLY | O_CREAT | O_EXCL, 0600);
if (fd < 0) PFATAL("Unable to create '%s'", out_file);
}
if (binary_mode) {
for (i = 0; i < MAP_SIZE; i++)
if (trace_bits[i]) ret++;
ck_write(fd, trace_bits, MAP_SIZE, out_file);
close(fd);
} else {
FILE* f = fdopen(fd, "w");
if (!f) PFATAL("fdopen() failed");
for (i = 0; i < MAP_SIZE; i++) {
if (!trace_bits[i]) continue;
ret++;
if (cmin_mode) {
if (child_timed_out) break;
if (!caa && child_crashed != cco) break;
fprintf(f, "%u%u\n", trace_bits[i], i);
} else fprintf(f, "%06u:%u\n", i, trace_bits[i]);
}
fclose(f);
}
return ret;
}
/* Handle timeout signal. */
static void handle_timeout(int sig) {
child_timed_out = 1;
if (child_pid > 0) kill(child_pid, SIGKILL);
}
/* Execute target application. */
static void run_target(char** argv) {
static struct itimerval it;
int status = 0;
if (!quiet_mode)
SAYF("-- Program output begins --\n" cRST);
MEM_BARRIER();
child_pid = fork();
if (child_pid < 0) PFATAL("fork() failed");
if (!child_pid) {
struct rlimit r;
if (quiet_mode) {
s32 fd = open("/dev/null", O_RDWR);
if (fd < 0 || dup2(fd, 1) < 0 || dup2(fd, 2) < 0) {
*(u32*)trace_bits = EXEC_FAIL_SIG;
PFATAL("Descriptor initialization failed");
}
close(fd);
}
if (mem_limit) {
r.rlim_max = r.rlim_cur = ((rlim_t)mem_limit) << 20;
#ifdef RLIMIT_AS
setrlimit(RLIMIT_AS, &r); /* Ignore errors */
#else
setrlimit(RLIMIT_DATA, &r); /* Ignore errors */
#endif /* ^RLIMIT_AS */
}
if (!keep_cores) r.rlim_max = r.rlim_cur = 0;
else r.rlim_max = r.rlim_cur = RLIM_INFINITY;
setrlimit(RLIMIT_CORE, &r); /* Ignore errors */
if (!getenv("LD_BIND_LAZY")) setenv("LD_BIND_NOW", "1", 0);
setsid();
execv(target_path, argv);
*(u32*)trace_bits = EXEC_FAIL_SIG;
exit(0);
}
/* Configure timeout, wait for child, cancel timeout. */
if (exec_tmout) {
child_timed_out = 0;
it.it_value.tv_sec = (exec_tmout / 1000);
it.it_value.tv_usec = (exec_tmout % 1000) * 1000;
}
setitimer(ITIMER_REAL, &it, NULL);
if (waitpid(child_pid, &status, 0) <= 0) FATAL("waitpid() failed");
child_pid = 0;
it.it_value.tv_sec = 0;
it.it_value.tv_usec = 0;
setitimer(ITIMER_REAL, &it, NULL);
MEM_BARRIER();
/* Clean up bitmap, analyze exit condition, etc. */
if (*(u32*)trace_bits == EXEC_FAIL_SIG)
FATAL("Unable to execute '%s'", argv[0]);
classify_counts(trace_bits, binary_mode ?
count_class_binary : count_class_human);
if (!quiet_mode)
SAYF(cRST "-- Program output ends --\n");
if (!child_timed_out && !stop_soon && WIFSIGNALED(status))
child_crashed = 1;
if (!quiet_mode) {
if (child_timed_out)
SAYF(cLRD "\n+++ Program timed off +++\n" cRST);
else if (stop_soon)
SAYF(cLRD "\n+++ Program aborted by user +++\n" cRST);
else if (child_crashed)
SAYF(cLRD "\n+++ Program killed by signal %u +++\n" cRST, WTERMSIG(status));
}
}
/* Handle Ctrl-C and the like. */
static void handle_stop_sig(int sig) {
stop_soon = 1;
if (child_pid > 0) kill(child_pid, SIGKILL);
}
/* Do basic preparations - persistent fds, filenames, etc. */
static void set_up_environment(void) {
setenv("ASAN_OPTIONS", "abort_on_error=1:"
"detect_leaks=0:"
"symbolize=0:"
"allocator_may_return_null=1", 0);
setenv("MSAN_OPTIONS", "exit_code=" STRINGIFY(MSAN_ERROR) ":"
"symbolize=0:"
"abort_on_error=1:"
"allocator_may_return_null=1:"
"msan_track_origins=0", 0);
if (getenv("AFL_PRELOAD")) {
setenv("LD_PRELOAD", getenv("AFL_PRELOAD"), 1);
setenv("DYLD_INSERT_LIBRARIES", getenv("AFL_PRELOAD"), 1);
}
}
/* Setup signal handlers, duh. */
static void setup_signal_handlers(void) {
struct sigaction sa;
sa.sa_handler = NULL;
sa.sa_flags = SA_RESTART;
sa.sa_sigaction = NULL;
sigemptyset(&sa.sa_mask);
/* Various ways of saying "stop". */
sa.sa_handler = handle_stop_sig;
sigaction(SIGHUP, &sa, NULL);
sigaction(SIGINT, &sa, NULL);
sigaction(SIGTERM, &sa, NULL);
/* Exec timeout notifications. */
sa.sa_handler = handle_timeout;
sigaction(SIGALRM, &sa, NULL);
}
/* Detect @@ in args. */
static void detect_file_args(char** argv) {
u32 i = 0;
u8* cwd = getcwd(NULL, 0);
if (!cwd) PFATAL("getcwd() failed");
while (argv[i]) {
u8* aa_loc = strstr(argv[i], "@@");
if (aa_loc) {
u8 *aa_subst, *n_arg;
if (!at_file) FATAL("@@ syntax is not supported by this tool.");
/* Be sure that we're always using fully-qualified paths. */
if (at_file[0] == '/') aa_subst = at_file;
else aa_subst = alloc_printf("%s/%s", cwd, at_file);
/* Construct a replacement argv value. */
*aa_loc = 0;
n_arg = alloc_printf("%s%s%s", argv[i], aa_subst, aa_loc + 2);
argv[i] = n_arg;
*aa_loc = '@';
if (at_file[0] != '/') ck_free(aa_subst);
}
i++;
}
free(cwd); /* not tracked */
}
/* Show banner. */
static void show_banner(void) {
SAYF(cCYA "afl-showmap " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
}
/* Display usage hints. */
static void usage(u8* argv0) {
show_banner();
SAYF("\n%s [ options ] -- /path/to/target_app [ ... ]\n\n"
"Required parameters:\n\n"
" -o file - file to write the trace data to\n\n"
"Execution control settings:\n\n"
" -t msec - timeout for each run (none)\n"
" -m megs - memory limit for child process (%u MB)\n"
" -Q - use binary-only instrumentation (QEMU mode)\n\n"
"Other settings:\n\n"
" -q - sink program's output and don't show messages\n"
" -e - show edge coverage only, ignore hit counts\n"
" -c - allow core dumps\n"
" -V - show version number and exit\n\n"
"This tool displays raw tuple data captured by AFL instrumentation.\n"
"For additional help, consult %s/README.\n\n" cRST,
argv0, MEM_LIMIT, doc_path);
exit(1);
}
/* Find binary. */
static void find_binary(u8* fname) {
u8* env_path = 0;
struct stat st;
if (strchr(fname, '/') || !(env_path = getenv("PATH"))) {
target_path = ck_strdup(fname);
if (stat(target_path, &st) || !S_ISREG(st.st_mode) ||
!(st.st_mode & 0111) || st.st_size < 4)
FATAL("Program '%s' not found or not executable", fname);
} else {
while (env_path) {
u8 *cur_elem, *delim = strchr(env_path, ':');
if (delim) {
cur_elem = ck_alloc(delim - env_path + 1);
memcpy(cur_elem, env_path, delim - env_path);
delim++;
} else cur_elem = ck_strdup(env_path);
env_path = delim;
if (cur_elem[0])
target_path = alloc_printf("%s/%s", cur_elem, fname);
else
target_path = ck_strdup(fname);
ck_free(cur_elem);
if (!stat(target_path, &st) && S_ISREG(st.st_mode) &&
(st.st_mode & 0111) && st.st_size >= 4) break;
ck_free(target_path);
target_path = 0;
}
if (!target_path) FATAL("Program '%s' not found or not executable", fname);
}
}
/* Fix up argv for QEMU. */
static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
char** new_argv = ck_alloc(sizeof(char*) * (argc + 4));
u8 *tmp, *cp, *rsl, *own_copy;
/* Workaround for a QEMU stability glitch. */
setenv("QEMU_LOG", "nochain", 1);
memcpy(new_argv + 3, argv + 1, sizeof(char*) * argc);
new_argv[2] = target_path;
new_argv[1] = "--";
/* Now we need to actually find qemu for argv[0]. */
tmp = getenv("AFL_PATH");
if (tmp) {
cp = alloc_printf("%s/afl-qemu-trace", tmp);
if (access(cp, X_OK))
FATAL("Unable to find '%s'", tmp);
target_path = new_argv[0] = cp;
return new_argv;
}
own_copy = ck_strdup(own_loc);
rsl = strrchr(own_copy, '/');
if (rsl) {
*rsl = 0;
cp = alloc_printf("%s/afl-qemu-trace", own_copy);
ck_free(own_copy);
if (!access(cp, X_OK)) {
target_path = new_argv[0] = cp;
return new_argv;
}
} else ck_free(own_copy);
if (!access(BIN_PATH "/afl-qemu-trace", X_OK)) {
target_path = new_argv[0] = BIN_PATH "/afl-qemu-trace";
return new_argv;
}
FATAL("Unable to find 'afl-qemu-trace'.");
}
/* Main entry point */
int main(int argc, char** argv) {
s32 opt;
u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0;
u32 tcnt;
char** use_argv;
doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
while ((opt = getopt(argc,argv,"+o:m:t:A:eqZQbcV")) > 0)
switch (opt) {
case 'o':
if (out_file) FATAL("Multiple -o options not supported");
out_file = optarg;
break;
case 'm': {
u8 suffix = 'M';
if (mem_limit_given) FATAL("Multiple -m options not supported");
mem_limit_given = 1;
if (!strcmp(optarg, "none")) {
mem_limit = 0;
break;
}
if (sscanf(optarg, "%llu%c", &mem_limit, &suffix) < 1 ||
optarg[0] == '-') FATAL("Bad syntax used for -m");
switch (suffix) {
case 'T': mem_limit *= 1024 * 1024; break;
case 'G': mem_limit *= 1024; break;
case 'k': mem_limit /= 1024; break;
case 'M': break;
default: FATAL("Unsupported suffix or bad syntax for -m");
}
if (mem_limit < 5) FATAL("Dangerously low value of -m");
if (sizeof(rlim_t) == 4 && mem_limit > 2000)
FATAL("Value of -m out of range on 32-bit systems");
}
break;
case 't':
if (timeout_given) FATAL("Multiple -t options not supported");
timeout_given = 1;
if (strcmp(optarg, "none")) {
exec_tmout = atoi(optarg);
if (exec_tmout < 20 || optarg[0] == '-')
FATAL("Dangerously low value of -t");
}
break;
case 'e':
if (edges_only) FATAL("Multiple -e options not supported");
edges_only = 1;
break;
case 'q':
if (quiet_mode) FATAL("Multiple -q options not supported");
quiet_mode = 1;
break;
case 'Z':
/* This is an undocumented option to write data in the syntax expected
by afl-cmin. Nobody else should have any use for this. */
cmin_mode = 1;
quiet_mode = 1;
break;
case 'A':
/* Another afl-cmin specific feature. */
at_file = optarg;
break;
case 'Q':
if (qemu_mode) FATAL("Multiple -Q options not supported");
if (!mem_limit_given) mem_limit = MEM_LIMIT_QEMU;
qemu_mode = 1;
break;
case 'b':
/* Secret undocumented mode. Writes output in raw binary format
similar to that dumped by afl-fuzz in <out_dir/queue/fuzz_bitmap. */
binary_mode = 1;
break;
case 'c':
if (keep_cores) FATAL("Multiple -c options not supported");
keep_cores = 1;
break;
case 'V':
show_banner();
exit(0);
default:
usage(argv[0]);
}
if (optind == argc || !out_file) usage(argv[0]);
setup_shm();
setup_signal_handlers();
set_up_environment();
find_binary(argv[optind]);
if (!quiet_mode) {
show_banner();
ACTF("Executing '%s'...\n", target_path);
}
detect_file_args(argv + optind);
if (qemu_mode)
use_argv = get_qemu_argv(argv[0], argv + optind, argc - optind);
else
use_argv = argv + optind;
run_target(use_argv);
tcnt = write_results();
if (!quiet_mode) {
if (!tcnt) FATAL("No instrumentation detected" cRST);
OKF("Captured %u tuples in '%s'." cRST, tcnt, out_file);
}
exit(child_crashed * 2 + child_timed_out);
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,163 @@
#!/bin/sh
#
# american fuzzy lop - status check tool
# --------------------------------------
#
# Written and maintained by Michal Zalewski <lcamtuf@google.com>
#
# Copyright 2015 Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# This tool summarizes the status of any locally-running synchronized
# instances of afl-fuzz.
#
echo "status check tool for afl-fuzz by <lcamtuf@google.com>"
echo
if [ "$1" = "-s" ]; then
SUMMARY_ONLY=1
DIR="$2"
else
unset SUMMARY_ONLY
DIR="$1"
fi
if [ "$DIR" = "" ]; then
echo "Usage: $0 [ -s ] afl_sync_dir" 1>&2
echo 1>&2
echo "The -s option causes the tool to skip all the per-fuzzer trivia and show" 1>&2
echo "just the summary results. See docs/parallel_fuzzing.txt for additional tips." 1>&2
echo 1>&2
exit 1
fi
cd "$DIR" || exit 1
if [ -d queue ]; then
echo "[-] Error: parameter is an individual output directory, not a sync dir." 1>&2
exit 1
fi
CUR_TIME=`date +%s`
TMP=`mktemp -t .afl-whatsup-XXXXXXXX` || TMP=`mktemp -p /data/local/tmp .afl-whatsup-XXXXXXXX` || exit 1
ALIVE_CNT=0
DEAD_CNT=0
TOTAL_TIME=0
TOTAL_EXECS=0
TOTAL_EPS=0
TOTAL_CRASHES=0
TOTAL_PFAV=0
TOTAL_PENDING=0
if [ "$SUMMARY_ONLY" = "" ]; then
echo "Individual fuzzers"
echo "=================="
echo
fi
for i in `find . -maxdepth 2 -iname fuzzer_stats | sort`; do
sed 's/^command_line.*$/_skip:1/;s/[ ]*:[ ]*/="/;s/$/"/' "$i" >"$TMP"
. "$TMP"
RUN_UNIX=$((CUR_TIME - start_time))
RUN_DAYS=$((RUN_UNIX / 60 / 60 / 24))
RUN_HRS=$(((RUN_UNIX / 60 / 60) % 24))
if [ "$SUMMARY_ONLY" = "" ]; then
echo ">>> $afl_banner ($RUN_DAYS days, $RUN_HRS hrs) <<<"
echo
fi
if ! kill -0 "$fuzzer_pid" 2>/dev/null; then
if [ "$SUMMARY_ONLY" = "" ]; then
echo " Instance is dead or running remotely, skipping."
echo
fi
DEAD_CNT=$((DEAD_CNT + 1))
continue
fi
ALIVE_CNT=$((ALIVE_CNT + 1))
EXEC_SEC=$((execs_done / RUN_UNIX))
PATH_PERC=$((cur_path * 100 / paths_total))
TOTAL_TIME=$((TOTAL_TIME + RUN_UNIX))
TOTAL_EPS=$((TOTAL_EPS + EXEC_SEC))
TOTAL_EXECS=$((TOTAL_EXECS + execs_done))
TOTAL_CRASHES=$((TOTAL_CRASHES + unique_crashes))
TOTAL_PENDING=$((TOTAL_PENDING + pending_total))
TOTAL_PFAV=$((TOTAL_PFAV + pending_favs))
if [ "$SUMMARY_ONLY" = "" ]; then
echo " cycle $((cycles_done + 1)), lifetime speed $EXEC_SEC execs/sec, path $cur_path/$paths_total (${PATH_PERC}%)"
if [ "$unique_crashes" = "0" ]; then
echo " pending $pending_favs/$pending_total, coverage $bitmap_cvg, no crashes yet"
else
echo " pending $pending_favs/$pending_total, coverage $bitmap_cvg, crash count $unique_crashes (!)"
fi
echo
fi
done
rm -f "$TMP"
TOTAL_DAYS=$((TOTAL_TIME / 60 / 60 / 24))
TOTAL_HRS=$(((TOTAL_TIME / 60 / 60) % 24))
test "$TOTAL_TIME" = "0" && TOTAL_TIME=1
echo "Summary stats"
echo "============="
echo
echo " Fuzzers alive : $ALIVE_CNT"
if [ ! "$DEAD_CNT" = "0" ]; then
echo " Dead or remote : $DEAD_CNT (excluded from stats)"
fi
echo " Total run time : $TOTAL_DAYS days, $TOTAL_HRS hours"
echo " Total execs : $((TOTAL_EXECS / 1000 / 1000)) million"
echo " Cumulative speed : $TOTAL_EPS execs/sec"
echo " Pending paths : $TOTAL_PFAV faves, $TOTAL_PENDING total"
if [ "$ALIVE_CNT" -gt "1" ]; then
echo " Pending per fuzzer : $((TOTAL_PFAV/ALIVE_CNT)) faves, $((TOTAL_PENDING/ALIVE_CNT)) total (on average)"
fi
echo " Crashes found : $TOTAL_CRASHES locally unique"
echo
exit 0

@ -0,0 +1,577 @@
/*
Copyright 2013 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - error-checking, memory-zeroing alloc routines
------------------------------------------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
This allocator is not designed to resist malicious attackers (the canaries
are small and predictable), but provides a robust and portable way to detect
use-after-free, off-by-one writes, stale pointers, and so on.
*/
#ifndef _HAVE_ALLOC_INL_H
#define _HAVE_ALLOC_INL_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
#include "types.h"
#include "debug.h"
/* User-facing macro to sprintf() to a dynamically allocated buffer. */
#define alloc_printf(_str...) ({ \
u8* _tmp; \
s32 _len = snprintf(NULL, 0, _str); \
if (_len < 0) FATAL("Whoa, snprintf() fails?!"); \
_tmp = ck_alloc(_len + 1); \
snprintf((char*)_tmp, _len + 1, _str); \
_tmp; \
})
/* Macro to enforce allocation limits as a last-resort defense against
integer overflows. */
#define ALLOC_CHECK_SIZE(_s) do { \
if ((_s) > MAX_ALLOC) \
ABORT("Bad alloc request: %u bytes", (_s)); \
} while (0)
/* Macro to check malloc() failures and the like. */
#define ALLOC_CHECK_RESULT(_r, _s) do { \
if (!(_r)) \
ABORT("Out of memory: can't allocate %u bytes", (_s)); \
} while (0)
/* Magic tokens used to mark used / freed chunks. */
#define ALLOC_MAGIC_C1 0xFF00FF00 /* Used head (dword) */
#define ALLOC_MAGIC_F 0xFE00FE00 /* Freed head (dword) */
#define ALLOC_MAGIC_C2 0xF0 /* Used tail (byte) */
/* Positions of guard tokens in relation to the user-visible pointer. */
#define ALLOC_C1(_ptr) (((u32*)(_ptr))[-2])
#define ALLOC_S(_ptr) (((u32*)(_ptr))[-1])
#define ALLOC_C2(_ptr) (((u8*)(_ptr))[ALLOC_S(_ptr)])
#define ALLOC_OFF_HEAD 8
#define ALLOC_OFF_TOTAL (ALLOC_OFF_HEAD + 1)
/* Allocator increments for ck_realloc_block(). */
#define ALLOC_BLK_INC 256
/* Sanity-checking macros for pointers. */
#define CHECK_PTR(_p) do { \
if (_p) { \
if (ALLOC_C1(_p) ^ ALLOC_MAGIC_C1) {\
if (ALLOC_C1(_p) == ALLOC_MAGIC_F) \
ABORT("Use after free."); \
else ABORT("Corrupted head alloc canary."); \
} \
if (ALLOC_C2(_p) ^ ALLOC_MAGIC_C2) \
ABORT("Corrupted tail alloc canary."); \
} \
} while (0)
#define CHECK_PTR_EXPR(_p) ({ \
typeof (_p) _tmp = (_p); \
CHECK_PTR(_tmp); \
_tmp; \
})
/* Allocate a buffer, explicitly not zeroing it. Returns NULL for zero-sized
requests. */
static inline void* DFL_ck_alloc_nozero(u32 size) {
void* ret;
if (!size) return NULL;
ALLOC_CHECK_SIZE(size);
ret = malloc(size + ALLOC_OFF_TOTAL);
ALLOC_CHECK_RESULT(ret, size);
ret += ALLOC_OFF_HEAD;
ALLOC_C1(ret) = ALLOC_MAGIC_C1;
ALLOC_S(ret) = size;
ALLOC_C2(ret) = ALLOC_MAGIC_C2;
return ret;
}
/* Allocate a buffer, returning zeroed memory. */
static inline void* DFL_ck_alloc(u32 size) {
void* mem;
if (!size) return NULL;
mem = DFL_ck_alloc_nozero(size);
return memset(mem, 0, size);
}
/* Free memory, checking for double free and corrupted heap. When DEBUG_BUILD
is set, the old memory will be also clobbered with 0xFF. */
static inline void DFL_ck_free(void* mem) {
if (!mem) return;
CHECK_PTR(mem);
#ifdef DEBUG_BUILD
/* Catch pointer issues sooner. */
memset(mem, 0xFF, ALLOC_S(mem));
#endif /* DEBUG_BUILD */
ALLOC_C1(mem) = ALLOC_MAGIC_F;
free(mem - ALLOC_OFF_HEAD);
}
/* Re-allocate a buffer, checking for issues and zeroing any newly-added tail.
With DEBUG_BUILD, the buffer is always reallocated to a new addresses and the
old memory is clobbered with 0xFF. */
static inline void* DFL_ck_realloc(void* orig, u32 size) {
void* ret;
u32 old_size = 0;
if (!size) {
DFL_ck_free(orig);
return NULL;
}
if (orig) {
CHECK_PTR(orig);
#ifndef DEBUG_BUILD
ALLOC_C1(orig) = ALLOC_MAGIC_F;
#endif /* !DEBUG_BUILD */
old_size = ALLOC_S(orig);
orig -= ALLOC_OFF_HEAD;
ALLOC_CHECK_SIZE(old_size);
}
ALLOC_CHECK_SIZE(size);
#ifndef DEBUG_BUILD
ret = realloc(orig, size + ALLOC_OFF_TOTAL);
ALLOC_CHECK_RESULT(ret, size);
#else
/* Catch pointer issues sooner: force relocation and make sure that the
original buffer is wiped. */
ret = malloc(size + ALLOC_OFF_TOTAL);
ALLOC_CHECK_RESULT(ret, size);
if (orig) {
memcpy(ret + ALLOC_OFF_HEAD, orig + ALLOC_OFF_HEAD, MIN(size, old_size));
memset(orig + ALLOC_OFF_HEAD, 0xFF, old_size);
ALLOC_C1(orig + ALLOC_OFF_HEAD) = ALLOC_MAGIC_F;
free(orig);
}
#endif /* ^!DEBUG_BUILD */
ret += ALLOC_OFF_HEAD;
ALLOC_C1(ret) = ALLOC_MAGIC_C1;
ALLOC_S(ret) = size;
ALLOC_C2(ret) = ALLOC_MAGIC_C2;
if (size > old_size)
memset(ret + old_size, 0, size - old_size);
return ret;
}
/* Re-allocate a buffer with ALLOC_BLK_INC increments (used to speed up
repeated small reallocs without complicating the user code). */
static inline void* DFL_ck_realloc_block(void* orig, u32 size) {
#ifndef DEBUG_BUILD
if (orig) {
CHECK_PTR(orig);
if (ALLOC_S(orig) >= size) return orig;
size += ALLOC_BLK_INC;
}
#endif /* !DEBUG_BUILD */
return DFL_ck_realloc(orig, size);
}
/* Create a buffer with a copy of a string. Returns NULL for NULL inputs. */
static inline u8* DFL_ck_strdup(u8* str) {
void* ret;
u32 size;
if (!str) return NULL;
size = strlen((char*)str) + 1;
ALLOC_CHECK_SIZE(size);
ret = malloc(size + ALLOC_OFF_TOTAL);
ALLOC_CHECK_RESULT(ret, size);
ret += ALLOC_OFF_HEAD;
ALLOC_C1(ret) = ALLOC_MAGIC_C1;
ALLOC_S(ret) = size;
ALLOC_C2(ret) = ALLOC_MAGIC_C2;
return memcpy(ret, str, size);
}
/* Create a buffer with a copy of a memory block. Returns NULL for zero-sized
or NULL inputs. */
static inline void* DFL_ck_memdup(void* mem, u32 size) {
void* ret;
if (!mem || !size) return NULL;
ALLOC_CHECK_SIZE(size);
ret = malloc(size + ALLOC_OFF_TOTAL);
ALLOC_CHECK_RESULT(ret, size);
ret += ALLOC_OFF_HEAD;
ALLOC_C1(ret) = ALLOC_MAGIC_C1;
ALLOC_S(ret) = size;
ALLOC_C2(ret) = ALLOC_MAGIC_C2;
return memcpy(ret, mem, size);
}
/* Create a buffer with a block of text, appending a NUL terminator at the end.
Returns NULL for zero-sized or NULL inputs. */
static inline u8* DFL_ck_memdup_str(u8* mem, u32 size) {
u8* ret;
if (!mem || !size) return NULL;
ALLOC_CHECK_SIZE(size);
ret = malloc(size + ALLOC_OFF_TOTAL + 1);
ALLOC_CHECK_RESULT(ret, size);
ret += ALLOC_OFF_HEAD;
ALLOC_C1(ret) = ALLOC_MAGIC_C1;
ALLOC_S(ret) = size;
ALLOC_C2(ret) = ALLOC_MAGIC_C2;
memcpy(ret, mem, size);
ret[size] = 0;
return ret;
}
#ifndef DEBUG_BUILD
/* In non-debug mode, we just do straightforward aliasing of the above functions
to user-visible names such as ck_alloc(). */
#define ck_alloc DFL_ck_alloc
#define ck_alloc_nozero DFL_ck_alloc_nozero
#define ck_realloc DFL_ck_realloc
#define ck_realloc_block DFL_ck_realloc_block
#define ck_strdup DFL_ck_strdup
#define ck_memdup DFL_ck_memdup
#define ck_memdup_str DFL_ck_memdup_str
#define ck_free DFL_ck_free
#define alloc_report()
#else
/* In debugging mode, we also track allocations to detect memory leaks, and the
flow goes through one more layer of indirection. */
/* Alloc tracking data structures: */
#define ALLOC_BUCKETS 4096
struct TRK_obj {
void *ptr;
char *file, *func;
u32 line;
};
#ifdef AFL_MAIN
struct TRK_obj* TRK[ALLOC_BUCKETS];
u32 TRK_cnt[ALLOC_BUCKETS];
# define alloc_report() TRK_report()
#else
extern struct TRK_obj* TRK[ALLOC_BUCKETS];
extern u32 TRK_cnt[ALLOC_BUCKETS];
# define alloc_report()
#endif /* ^AFL_MAIN */
/* Bucket-assigning function for a given pointer: */
#define TRKH(_ptr) (((((u32)(_ptr)) >> 16) ^ ((u32)(_ptr))) % ALLOC_BUCKETS)
/* Add a new entry to the list of allocated objects. */
static inline void TRK_alloc_buf(void* ptr, const char* file, const char* func,
u32 line) {
u32 i, bucket;
if (!ptr) return;
bucket = TRKH(ptr);
/* Find a free slot in the list of entries for that bucket. */
for (i = 0; i < TRK_cnt[bucket]; i++)
if (!TRK[bucket][i].ptr) {
TRK[bucket][i].ptr = ptr;
TRK[bucket][i].file = (char*)file;
TRK[bucket][i].func = (char*)func;
TRK[bucket][i].line = line;
return;
}
/* No space available - allocate more. */
TRK[bucket] = DFL_ck_realloc_block(TRK[bucket],
(TRK_cnt[bucket] + 1) * sizeof(struct TRK_obj));
TRK[bucket][i].ptr = ptr;
TRK[bucket][i].file = (char*)file;
TRK[bucket][i].func = (char*)func;
TRK[bucket][i].line = line;
TRK_cnt[bucket]++;
}
/* Remove entry from the list of allocated objects. */
static inline void TRK_free_buf(void* ptr, const char* file, const char* func,
u32 line) {
u32 i, bucket;
if (!ptr) return;
bucket = TRKH(ptr);
/* Find the element on the list... */
for (i = 0; i < TRK_cnt[bucket]; i++)
if (TRK[bucket][i].ptr == ptr) {
TRK[bucket][i].ptr = 0;
return;
}
WARNF("ALLOC: Attempt to free non-allocated memory in %s (%s:%u)",
func, file, line);
}
/* Do a final report on all non-deallocated objects. */
static inline void TRK_report(void) {
u32 i, bucket;
fflush(0);
for (bucket = 0; bucket < ALLOC_BUCKETS; bucket++)
for (i = 0; i < TRK_cnt[bucket]; i++)
if (TRK[bucket][i].ptr)
WARNF("ALLOC: Memory never freed, created in %s (%s:%u)",
TRK[bucket][i].func, TRK[bucket][i].file, TRK[bucket][i].line);
}
/* Simple wrappers for non-debugging functions: */
static inline void* TRK_ck_alloc(u32 size, const char* file, const char* func,
u32 line) {
void* ret = DFL_ck_alloc(size);
TRK_alloc_buf(ret, file, func, line);
return ret;
}
static inline void* TRK_ck_realloc(void* orig, u32 size, const char* file,
const char* func, u32 line) {
void* ret = DFL_ck_realloc(orig, size);
TRK_free_buf(orig, file, func, line);
TRK_alloc_buf(ret, file, func, line);
return ret;
}
static inline void* TRK_ck_realloc_block(void* orig, u32 size, const char* file,
const char* func, u32 line) {
void* ret = DFL_ck_realloc_block(orig, size);
TRK_free_buf(orig, file, func, line);
TRK_alloc_buf(ret, file, func, line);
return ret;
}
static inline void* TRK_ck_strdup(u8* str, const char* file, const char* func,
u32 line) {
void* ret = DFL_ck_strdup(str);
TRK_alloc_buf(ret, file, func, line);
return ret;
}
static inline void* TRK_ck_memdup(void* mem, u32 size, const char* file,
const char* func, u32 line) {
void* ret = DFL_ck_memdup(mem, size);
TRK_alloc_buf(ret, file, func, line);
return ret;
}
static inline void* TRK_ck_memdup_str(void* mem, u32 size, const char* file,
const char* func, u32 line) {
void* ret = DFL_ck_memdup_str(mem, size);
TRK_alloc_buf(ret, file, func, line);
return ret;
}
static inline void TRK_ck_free(void* ptr, const char* file,
const char* func, u32 line) {
TRK_free_buf(ptr, file, func, line);
DFL_ck_free(ptr);
}
/* Aliasing user-facing names to tracking functions: */
#define ck_alloc(_p1) \
TRK_ck_alloc(_p1, __FILE__, __FUNCTION__, __LINE__)
#define ck_alloc_nozero(_p1) \
TRK_ck_alloc(_p1, __FILE__, __FUNCTION__, __LINE__)
#define ck_realloc(_p1, _p2) \
TRK_ck_realloc(_p1, _p2, __FILE__, __FUNCTION__, __LINE__)
#define ck_realloc_block(_p1, _p2) \
TRK_ck_realloc_block(_p1, _p2, __FILE__, __FUNCTION__, __LINE__)
#define ck_strdup(_p1) \
TRK_ck_strdup(_p1, __FILE__, __FUNCTION__, __LINE__)
#define ck_memdup(_p1, _p2) \
TRK_ck_memdup(_p1, _p2, __FILE__, __FUNCTION__, __LINE__)
#define ck_memdup_str(_p1, _p2) \
TRK_ck_memdup_str(_p1, _p2, __FILE__, __FUNCTION__, __LINE__)
#define ck_free(_p1) \
TRK_ck_free(_p1, __FILE__, __FUNCTION__, __LINE__)
#endif /* ^!DEBUG_BUILD */
#endif /* ! _HAVE_ALLOC_INL_H */

@ -0,0 +1,82 @@
#ifdef __ANDROID__
#ifndef _ANDROID_ASHMEM_H
#define _ANDROID_ASHMEM_H
#include <fcntl.h>
#include <linux/ashmem.h>
#include <linux/shm.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#if __ANDROID_API__ >= 26
#define shmat bionic_shmat
#define shmctl bionic_shmctl
#define shmdt bionic_shmdt
#define shmget bionic_shmget
#endif
#include <sys/shm.h>
#undef shmat
#undef shmctl
#undef shmdt
#undef shmget
#include <stdio.h>
#define ASHMEM_DEVICE "/dev/ashmem"
static inline int shmctl(int __shmid, int __cmd, struct shmid_ds *__buf) {
int ret = 0;
if (__cmd == IPC_RMID) {
int length = ioctl(__shmid, ASHMEM_GET_SIZE, NULL);
struct ashmem_pin pin = {0, length};
ret = ioctl(__shmid, ASHMEM_UNPIN, &pin);
close(__shmid);
}
return ret;
}
static inline int shmget(key_t __key, size_t __size, int __shmflg) {
(void) __shmflg;
int fd, ret;
char ourkey[11];
fd = open(ASHMEM_DEVICE, O_RDWR);
if (fd < 0)
return fd;
sprintf(ourkey, "%d", __key);
ret = ioctl(fd, ASHMEM_SET_NAME, ourkey);
if (ret < 0)
goto error;
ret = ioctl(fd, ASHMEM_SET_SIZE, __size);
if (ret < 0)
goto error;
return fd;
error:
close(fd);
return ret;
}
static inline void *shmat(int __shmid, const void *__shmaddr, int __shmflg) {
(void) __shmflg;
int size;
void *ptr;
size = ioctl(__shmid, ASHMEM_GET_SIZE, NULL);
if (size < 0) {
return NULL;
}
ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, __shmid, 0);
if (ptr == MAP_FAILED) {
return NULL;
}
return ptr;
}
#endif /* !_ANDROID_ASHMEM_H */
#endif /* !__ANDROID__ */

@ -0,0 +1,362 @@
/*
Copyright 2013 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - vaguely configurable bits
----------------------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
*/
#ifndef _HAVE_CONFIG_H
#define _HAVE_CONFIG_H
#include "types.h"
/* Version string: */
#define VERSION "2.57b"
/******************************************************
* *
* Settings that may be of interest to power users: *
* *
******************************************************/
/* Comment out to disable terminal colors (note that this makes afl-analyze
a lot less nice): */
#define USE_COLOR
/* Comment out to disable fancy ANSI boxes and use poor man's 7-bit UI: */
#define FANCY_BOXES
/* Default timeout for fuzzed code (milliseconds). This is the upper bound,
also used for detecting hangs; the actual value is auto-scaled: */
#define EXEC_TIMEOUT 1000
/* Timeout rounding factor when auto-scaling (milliseconds): */
#define EXEC_TM_ROUND 20
/* 64bit arch MACRO */
#if (defined (__x86_64__) || defined (__arm64__) || defined (__aarch64__))
#define WORD_SIZE_64 1
#endif
/* Default memory limit for child process (MB): */
#ifndef WORD_SIZE_64
# define MEM_LIMIT 25
#else
# define MEM_LIMIT 50
#endif /* ^!WORD_SIZE_64 */
/* Default memory limit when running in QEMU mode (MB): */
#define MEM_LIMIT_QEMU 200
/* Number of calibration cycles per every new test case (and for test
cases that show variable behavior): */
#define CAL_CYCLES 8
#define CAL_CYCLES_LONG 40
/* Number of subsequent timeouts before abandoning an input file: */
#define TMOUT_LIMIT 250
/* Maximum number of unique hangs or crashes to record: */
#define KEEP_UNIQUE_HANG 500
#define KEEP_UNIQUE_CRASH 5000
/* Baseline number of random tweaks during a single 'havoc' stage: */
#define HAVOC_CYCLES 256
#define HAVOC_CYCLES_INIT 1024
/* Maximum multiplier for the above (should be a power of two, beware
of 32-bit int overflows): */
#define HAVOC_MAX_MULT 16
/* Absolute minimum number of havoc cycles (after all adjustments): */
#define HAVOC_MIN 16
/* Maximum stacking for havoc-stage tweaks. The actual value is calculated
like this:
n = random between 1 and HAVOC_STACK_POW2
stacking = 2^n
In other words, the default (n = 7) produces 2, 4, 8, 16, 32, 64, or
128 stacked tweaks: */
#define HAVOC_STACK_POW2 7
/* Caps on block sizes for cloning and deletion operations. Each of these
ranges has a 33% probability of getting picked, except for the first
two cycles where smaller blocks are favored: */
#define HAVOC_BLK_SMALL 32
#define HAVOC_BLK_MEDIUM 128
#define HAVOC_BLK_LARGE 1500
/* Extra-large blocks, selected very rarely (<5% of the time): */
#define HAVOC_BLK_XL 32768
/* Probabilities of skipping non-favored entries in the queue, expressed as
percentages: */
#define SKIP_TO_NEW_PROB 99 /* ...when there are new, pending favorites */
#define SKIP_NFAV_OLD_PROB 95 /* ...no new favs, cur entry already fuzzed */
#define SKIP_NFAV_NEW_PROB 75 /* ...no new favs, cur entry not fuzzed yet */
/* Splicing cycle count: */
#define SPLICE_CYCLES 15
/* Nominal per-splice havoc cycle length: */
#define SPLICE_HAVOC 32
/* Maximum offset for integer addition / subtraction stages: */
#define ARITH_MAX 35
/* Limits for the test case trimmer. The absolute minimum chunk size; and
the starting and ending divisors for chopping up the input file: */
#define TRIM_MIN_BYTES 4
#define TRIM_START_STEPS 16
#define TRIM_END_STEPS 1024
/* Maximum size of input file, in bytes (keep under 100MB): */
#define MAX_FILE (1 * 1024 * 1024)
/* The same, for the test case minimizer: */
#define TMIN_MAX_FILE (10 * 1024 * 1024)
/* Block normalization steps for afl-tmin: */
#define TMIN_SET_MIN_SIZE 4
#define TMIN_SET_STEPS 128
/* Maximum dictionary token size (-x), in bytes: */
#define MAX_DICT_FILE 128
/* Length limits for auto-detected dictionary tokens: */
#define MIN_AUTO_EXTRA 3
#define MAX_AUTO_EXTRA 32
/* Maximum number of user-specified dictionary tokens to use in deterministic
steps; past this point, the "extras/user" step will be still carried out,
but with proportionally lower odds: */
#define MAX_DET_EXTRAS 200
/* Maximum number of auto-extracted dictionary tokens to actually use in fuzzing
(first value), and to keep in memory as candidates. The latter should be much
higher than the former. */
#define USE_AUTO_EXTRAS 50
#define MAX_AUTO_EXTRAS (USE_AUTO_EXTRAS * 10)
/* Scaling factor for the effector map used to skip some of the more
expensive deterministic steps. The actual divisor is set to
2^EFF_MAP_SCALE2 bytes: */
#define EFF_MAP_SCALE2 3
/* Minimum input file length at which the effector logic kicks in: */
#define EFF_MIN_LEN 128
/* Maximum effector density past which everything is just fuzzed
unconditionally (%): */
#define EFF_MAX_PERC 90
/* UI refresh frequency (Hz): */
#define UI_TARGET_HZ 5
/* Fuzzer stats file and plot update intervals (sec): */
#define STATS_UPDATE_SEC 60
#define PLOT_UPDATE_SEC 5
/* Smoothing divisor for CPU load and exec speed stats (1 - no smoothing). */
#define AVG_SMOOTHING 16
/* Sync interval (every n havoc cycles): */
#define SYNC_INTERVAL 5
/* Output directory reuse grace period (minutes): */
#define OUTPUT_GRACE 25
/* Uncomment to use simple file names (id_NNNNNN): */
// #define SIMPLE_FILES
/* List of interesting values to use in fuzzing. */
#define INTERESTING_8 \
-128, /* Overflow signed 8-bit when decremented */ \
-1, /* */ \
0, /* */ \
1, /* */ \
16, /* One-off with common buffer size */ \
32, /* One-off with common buffer size */ \
64, /* One-off with common buffer size */ \
100, /* One-off with common buffer size */ \
127 /* Overflow signed 8-bit when incremented */
#define INTERESTING_16 \
-32768, /* Overflow signed 16-bit when decremented */ \
-129, /* Overflow signed 8-bit */ \
128, /* Overflow signed 8-bit */ \
255, /* Overflow unsig 8-bit when incremented */ \
256, /* Overflow unsig 8-bit */ \
512, /* One-off with common buffer size */ \
1000, /* One-off with common buffer size */ \
1024, /* One-off with common buffer size */ \
4096, /* One-off with common buffer size */ \
32767 /* Overflow signed 16-bit when incremented */
#define INTERESTING_32 \
-2147483648LL, /* Overflow signed 32-bit when decremented */ \
-100663046, /* Large negative number (endian-agnostic) */ \
-32769, /* Overflow signed 16-bit */ \
32768, /* Overflow signed 16-bit */ \
65535, /* Overflow unsig 16-bit when incremented */ \
65536, /* Overflow unsig 16 bit */ \
100663045, /* Large positive number (endian-agnostic) */ \
2147483647 /* Overflow signed 32-bit when incremented */
/***********************************************************
* *
* Really exotic stuff you probably don't want to touch: *
* *
***********************************************************/
/* Call count interval between reseeding the libc PRNG from /dev/urandom: */
#define RESEED_RNG 10000
/* Maximum line length passed from GCC to 'as' and used for parsing
configuration files: */
#define MAX_LINE 8192
/* Environment variable used to pass SHM ID to the called program. */
#define SHM_ENV_VAR "__AFL_SHM_ID"
/* Other less interesting, internal-only variables. */
#define CLANG_ENV_VAR "__AFL_CLANG_MODE"
#define AS_LOOP_ENV_VAR "__AFL_AS_LOOPCHECK"
#define PERSIST_ENV_VAR "__AFL_PERSISTENT"
#define DEFER_ENV_VAR "__AFL_DEFER_FORKSRV"
/* In-code signatures for deferred and persistent mode. */
#define PERSIST_SIG "##SIG_AFL_PERSISTENT##"
#define DEFER_SIG "##SIG_AFL_DEFER_FORKSRV##"
/* Distinctive bitmap signature used to indicate failed execution: */
#define EXEC_FAIL_SIG 0xfee1dead
/* Distinctive exit code used to indicate MSAN trip condition: */
#define MSAN_ERROR 86
/* Designated file descriptors for forkserver commands (the application will
use FORKSRV_FD and FORKSRV_FD + 1): */
#define FORKSRV_FD 198
/* Fork server init timeout multiplier: we'll wait the user-selected
timeout plus this much for the fork server to spin up. */
#define FORK_WAIT_MULT 10
/* Calibration timeout adjustments, to be a bit more generous when resuming
fuzzing sessions or trying to calibrate already-added internal finds.
The first value is a percentage, the other is in milliseconds: */
#define CAL_TMOUT_PERC 125
#define CAL_TMOUT_ADD 50
/* Number of chances to calibrate a case before giving up: */
#define CAL_CHANCES 3
/* Map size for the traced binary (2^MAP_SIZE_POW2). Must be greater than
2; you probably want to keep it under 18 or so for performance reasons
(adjusting AFL_INST_RATIO when compiling is probably a better way to solve
problems with complex programs). You need to recompile the target binary
after changing this - otherwise, SEGVs may ensue. */
#define MAP_SIZE_POW2 16
#define MAP_SIZE (1 << MAP_SIZE_POW2)
/* Maximum allocator request size (keep well under INT_MAX): */
#define MAX_ALLOC 0x40000000
/* A made-up hashing seed: */
#define HASH_CONST 0xa5b35705
/* Constants for afl-gotcpu to control busy loop timing: */
#define CTEST_TARGET_MS 5000
#define CTEST_CORE_TRG_MS 1000
#define CTEST_BUSY_CYCLES (10 * 1000 * 1000)
/* Uncomment this to use inferior block-coverage-based instrumentation. Note
that you need to recompile the target binary for this to have any effect: */
// #define COVERAGE_ONLY
/* Uncomment this to ignore hit counts and output just one bit per tuple.
As with the previous setting, you will need to recompile the target
binary: */
// #define SKIP_COUNTS
/* Uncomment this to use instrumentation data to record newly discovered paths,
but do not use them as seeds for fuzzing. This is useful for conveniently
measuring coverage that could be attained by a "dumb" fuzzing algorithm: */
// #define IGNORE_FINDS
#endif /* ! _HAVE_CONFIG_H */

@ -0,0 +1,258 @@
/*
Copyright 2013 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - debug / error handling macros
--------------------------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
*/
#ifndef _HAVE_DEBUG_H
#define _HAVE_DEBUG_H
#include <errno.h>
#include "types.h"
#include "config.h"
/*******************
* Terminal colors *
*******************/
#ifdef USE_COLOR
# define cBLK "\x1b[0;30m"
# define cRED "\x1b[0;31m"
# define cGRN "\x1b[0;32m"
# define cBRN "\x1b[0;33m"
# define cBLU "\x1b[0;34m"
# define cMGN "\x1b[0;35m"
# define cCYA "\x1b[0;36m"
# define cLGR "\x1b[0;37m"
# define cGRA "\x1b[1;90m"
# define cLRD "\x1b[1;91m"
# define cLGN "\x1b[1;92m"
# define cYEL "\x1b[1;93m"
# define cLBL "\x1b[1;94m"
# define cPIN "\x1b[1;95m"
# define cLCY "\x1b[1;96m"
# define cBRI "\x1b[1;97m"
# define cRST "\x1b[0m"
# define bgBLK "\x1b[40m"
# define bgRED "\x1b[41m"
# define bgGRN "\x1b[42m"
# define bgBRN "\x1b[43m"
# define bgBLU "\x1b[44m"
# define bgMGN "\x1b[45m"
# define bgCYA "\x1b[46m"
# define bgLGR "\x1b[47m"
# define bgGRA "\x1b[100m"
# define bgLRD "\x1b[101m"
# define bgLGN "\x1b[102m"
# define bgYEL "\x1b[103m"
# define bgLBL "\x1b[104m"
# define bgPIN "\x1b[105m"
# define bgLCY "\x1b[106m"
# define bgBRI "\x1b[107m"
#else
# define cBLK ""
# define cRED ""
# define cGRN ""
# define cBRN ""
# define cBLU ""
# define cMGN ""
# define cCYA ""
# define cLGR ""
# define cGRA ""
# define cLRD ""
# define cLGN ""
# define cYEL ""
# define cLBL ""
# define cPIN ""
# define cLCY ""
# define cBRI ""
# define cRST ""
# define bgBLK ""
# define bgRED ""
# define bgGRN ""
# define bgBRN ""
# define bgBLU ""
# define bgMGN ""
# define bgCYA ""
# define bgLGR ""
# define bgGRA ""
# define bgLRD ""
# define bgLGN ""
# define bgYEL ""
# define bgLBL ""
# define bgPIN ""
# define bgLCY ""
# define bgBRI ""
#endif /* ^USE_COLOR */
/*************************
* Box drawing sequences *
*************************/
#ifdef FANCY_BOXES
# define SET_G1 "\x1b)0" /* Set G1 for box drawing */
# define RESET_G1 "\x1b)B" /* Reset G1 to ASCII */
# define bSTART "\x0e" /* Enter G1 drawing mode */
# define bSTOP "\x0f" /* Leave G1 drawing mode */
# define bH "q" /* Horizontal line */
# define bV "x" /* Vertical line */
# define bLT "l" /* Left top corner */
# define bRT "k" /* Right top corner */
# define bLB "m" /* Left bottom corner */
# define bRB "j" /* Right bottom corner */
# define bX "n" /* Cross */
# define bVR "t" /* Vertical, branch right */
# define bVL "u" /* Vertical, branch left */
# define bHT "v" /* Horizontal, branch top */
# define bHB "w" /* Horizontal, branch bottom */
#else
# define SET_G1 ""
# define RESET_G1 ""
# define bSTART ""
# define bSTOP ""
# define bH "-"
# define bV "|"
# define bLT "+"
# define bRT "+"
# define bLB "+"
# define bRB "+"
# define bX "+"
# define bVR "+"
# define bVL "+"
# define bHT "+"
# define bHB "+"
#endif /* ^FANCY_BOXES */
/***********************
* Misc terminal codes *
***********************/
#define TERM_HOME "\x1b[H"
#define TERM_CLEAR TERM_HOME "\x1b[2J"
#define cEOL "\x1b[0K"
#define CURSOR_HIDE "\x1b[?25l"
#define CURSOR_SHOW "\x1b[?25h"
/************************
* Debug & error macros *
************************/
/* Just print stuff to the appropriate stream. */
#ifdef MESSAGES_TO_STDOUT
# define SAYF(x...) printf(x)
#else
# define SAYF(x...) fprintf(stderr, x)
#endif /* ^MESSAGES_TO_STDOUT */
/* Show a prefixed warning. */
#define WARNF(x...) do { \
SAYF(cYEL "[!] " cBRI "WARNING: " cRST x); \
SAYF(cRST "\n"); \
} while (0)
/* Show a prefixed "doing something" message. */
#define ACTF(x...) do { \
SAYF(cLBL "[*] " cRST x); \
SAYF(cRST "\n"); \
} while (0)
/* Show a prefixed "success" message. */
#define OKF(x...) do { \
SAYF(cLGN "[+] " cRST x); \
SAYF(cRST "\n"); \
} while (0)
/* Show a prefixed fatal error message (not used in afl). */
#define BADF(x...) do { \
SAYF(cLRD "\n[-] " cRST x); \
SAYF(cRST "\n"); \
} while (0)
/* Die with a verbose non-OS fatal error message. */
#define FATAL(x...) do { \
SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] PROGRAM ABORT : " \
cBRI x); \
SAYF(cLRD "\n Location : " cRST "%s(), %s:%u\n\n", \
__FUNCTION__, __FILE__, __LINE__); \
exit(1); \
} while (0)
/* Die by calling abort() to provide a core dump. */
#define ABORT(x...) do { \
SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] PROGRAM ABORT : " \
cBRI x); \
SAYF(cLRD "\n Stop location : " cRST "%s(), %s:%u\n\n", \
__FUNCTION__, __FILE__, __LINE__); \
abort(); \
} while (0)
/* Die while also including the output of perror(). */
#define PFATAL(x...) do { \
fflush(stdout); \
SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] SYSTEM ERROR : " \
cBRI x); \
SAYF(cLRD "\n Stop location : " cRST "%s(), %s:%u\n", \
__FUNCTION__, __FILE__, __LINE__); \
SAYF(cLRD " OS message : " cRST "%s\n", strerror(errno)); \
exit(1); \
} while (0)
/* Die with FAULT() or PFAULT() depending on the value of res (used to
interpret different failure modes for read(), write(), etc). */
#define RPFATAL(res, x...) do { \
if (res < 0) PFATAL(x); else FATAL(x); \
} while (0)
/* Error-checking versions of read() and write() that call RPFATAL() as
appropriate. */
#define ck_write(fd, buf, len, fn) do { \
u32 _len = (len); \
s32 _res = write(fd, buf, _len); \
if (_res != _len) RPFATAL(_res, "Short write to %s", fn); \
} while (0)
#define ck_read(fd, buf, len, fn) do { \
u32 _len = (len); \
s32 _res = read(fd, buf, _len); \
if (_res != _len) RPFATAL(_res, "Short read from %s", fn); \
} while (0)
#endif /* ! _HAVE_DEBUG_H */

@ -0,0 +1,43 @@
================
AFL dictionaries
================
(See ../docs/README for the general instruction manual.)
This subdirectory contains a set of dictionaries that can be used in
conjunction with the -x option to allow the fuzzer to effortlessly explore the
grammar of some of the more verbose data formats or languages. The basic
principle behind the operation of fuzzer dictionaries is outlined in section 9
of the "main" README for the project.
Custom dictionaries can be added at will. They should consist of a
reasonably-sized set of rudimentary syntax units that the fuzzer will then try
to clobber together in various ways. Snippets between 2 and 16 bytes are usually
the sweet spot.
Custom dictionaries can be created in two ways:
- By creating a new directory and placing each token in a separate file, in
which case, there is no need to escape or otherwise format the data.
- By creating a flat text file where tokens are listed one per line in the
format of name="value". The alphanumeric name is ignored and can be omitted,
although it is a convenient way to document the meaning of a particular
token. The value must appear in quotes, with hex escaping (\xNN) applied to
all non-printable, high-bit, or otherwise problematic characters (\\ and \"
shorthands are recognized, too).
The fuzzer auto-selects the appropriate mode depending on whether the -x
parameter is a file or a directory.
In the file mode, every name field can be optionally followed by @<num>, e.g.:
keyword_foo@1 = "foo"
Such entries will be loaded only if the requested dictionary level is equal or
higher than this number. The default level is zero; a higher value can be set
by appending @<num> to the dictionary file name, like so:
-x path/to/dictionary.dct@2
Good examples of dictionaries can be found in xml.dict and png.dict.

@ -0,0 +1,18 @@
#
# AFL dictionary for GIF images
# -----------------------------
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
header_87a="87a"
header_89a="89a"
header_gif="GIF"
marker_2c=","
marker_3b=";"
section_2101="!\x01\x12"
section_21f9="!\xf9\x04"
section_21fe="!\xfe"
section_21ff="!\xff\x11"

@ -0,0 +1,160 @@
#
# AFL dictionary for HTML parsers (tags only)
# -------------------------------------------
#
# A basic collection of HTML tags likely to matter to HTML parsers. Does *not*
# include any attributes or attribute values.
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
tag_a="<a>"
tag_abbr="<abbr>"
tag_acronym="<acronym>"
tag_address="<address>"
tag_annotation_xml="<annotation-xml>"
tag_applet="<applet>"
tag_area="<area>"
tag_article="<article>"
tag_aside="<aside>"
tag_audio="<audio>"
tag_b="<b>"
tag_base="<base>"
tag_basefont="<basefont>"
tag_bdi="<bdi>"
tag_bdo="<bdo>"
tag_bgsound="<bgsound>"
tag_big="<big>"
tag_blink="<blink>"
tag_blockquote="<blockquote>"
tag_body="<body>"
tag_br="<br>"
tag_button="<button>"
tag_canvas="<canvas>"
tag_caption="<caption>"
tag_center="<center>"
tag_cite="<cite>"
tag_code="<code>"
tag_col="<col>"
tag_colgroup="<colgroup>"
tag_data="<data>"
tag_datalist="<datalist>"
tag_dd="<dd>"
tag_del="<del>"
tag_desc="<desc>"
tag_details="<details>"
tag_dfn="<dfn>"
tag_dir="<dir>"
tag_div="<div>"
tag_dl="<dl>"
tag_dt="<dt>"
tag_em="<em>"
tag_embed="<embed>"
tag_fieldset="<fieldset>"
tag_figcaption="<figcaption>"
tag_figure="<figure>"
tag_font="<font>"
tag_footer="<footer>"
tag_foreignobject="<foreignobject>"
tag_form="<form>"
tag_frame="<frame>"
tag_frameset="<frameset>"
tag_h1="<h1>"
tag_h2="<h2>"
tag_h3="<h3>"
tag_h4="<h4>"
tag_h5="<h5>"
tag_h6="<h6>"
tag_head="<head>"
tag_header="<header>"
tag_hgroup="<hgroup>"
tag_hr="<hr>"
tag_html="<html>"
tag_i="<i>"
tag_iframe="<iframe>"
tag_image="<image>"
tag_img="<img>"
tag_input="<input>"
tag_ins="<ins>"
tag_isindex="<isindex>"
tag_kbd="<kbd>"
tag_keygen="<keygen>"
tag_label="<label>"
tag_legend="<legend>"
tag_li="<li>"
tag_link="<link>"
tag_listing="<listing>"
tag_main="<main>"
tag_malignmark="<malignmark>"
tag_map="<map>"
tag_mark="<mark>"
tag_marquee="<marquee>"
tag_math="<math>"
tag_menu="<menu>"
tag_menuitem="<menuitem>"
tag_meta="<meta>"
tag_meter="<meter>"
tag_mglyph="<mglyph>"
tag_mi="<mi>"
tag_mn="<mn>"
tag_mo="<mo>"
tag_ms="<ms>"
tag_mtext="<mtext>"
tag_multicol="<multicol>"
tag_nav="<nav>"
tag_nextid="<nextid>"
tag_nobr="<nobr>"
tag_noembed="<noembed>"
tag_noframes="<noframes>"
tag_noscript="<noscript>"
tag_object="<object>"
tag_ol="<ol>"
tag_optgroup="<optgroup>"
tag_option="<option>"
tag_output="<output>"
tag_p="<p>"
tag_param="<param>"
tag_plaintext="<plaintext>"
tag_pre="<pre>"
tag_progress="<progress>"
tag_q="<q>"
tag_rb="<rb>"
tag_rp="<rp>"
tag_rt="<rt>"
tag_rtc="<rtc>"
tag_ruby="<ruby>"
tag_s="<s>"
tag_samp="<samp>"
tag_script="<script>"
tag_section="<section>"
tag_select="<select>"
tag_small="<small>"
tag_source="<source>"
tag_spacer="<spacer>"
tag_span="<span>"
tag_strike="<strike>"
tag_strong="<strong>"
tag_style="<style>"
tag_sub="<sub>"
tag_summary="<summary>"
tag_sup="<sup>"
tag_svg="<svg>"
tag_table="<table>"
tag_tbody="<tbody>"
tag_td="<td>"
tag_template="<template>"
tag_textarea="<textarea>"
tag_tfoot="<tfoot>"
tag_th="<th>"
tag_thead="<thead>"
tag_time="<time>"
tag_title="<title>"
tag_tr="<tr>"
tag_track="<track>"
tag_tt="<tt>"
tag_u="<u>"
tag_ul="<ul>"
tag_var="<var>"
tag_video="<video>"
tag_wbr="<wbr>"
tag_xmp="<xmp>"

@ -0,0 +1,22 @@
#
# AFL dictionary for JPEG images
# ------------------------------
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
header_jfif="JFIF\x00"
header_jfxx="JFXX\x00"
section_ffc0="\xff\xc0"
section_ffc2="\xff\xc2"
section_ffc4="\xff\xc4"
section_ffd0="\xff\xd0"
section_ffd8="\xff\xd8"
section_ffd9="\xff\xd9"
section_ffda="\xff\xda"
section_ffdb="\xff\xdb"
section_ffdd="\xff\xdd"
section_ffe0="\xff\xe0"
section_ffe1="\xff\xe1"
section_fffe="\xff\xfe"

@ -0,0 +1,107 @@
#
# AFL dictionary for JavaScript
# -----------------------------
#
# Contains basic reserved keywords and syntax building blocks.
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
keyword_arguments="arguments"
keyword_break="break"
keyword_case="case"
keyword_catch="catch"
keyword_const="const"
keyword_continue="continue"
keyword_debugger="debugger"
keyword_decodeURI="decodeURI"
keyword_default="default"
keyword_delete="delete"
keyword_do="do"
keyword_else="else"
keyword_escape="escape"
keyword_eval="eval"
keyword_export="export"
keyword_finally="finally"
keyword_for="for (a=0;a<2;a++)"
keyword_function="function"
keyword_if="if"
keyword_in="in"
keyword_instanceof="instanceof"
keyword_isNaN="isNaN"
keyword_let="let"
keyword_new="new"
keyword_parseInt="parseInt"
keyword_return="return"
keyword_switch="switch"
keyword_this="this"
keyword_throw="throw"
keyword_try="try"
keyword_typeof="typeof"
keyword_var="var"
keyword_void="void"
keyword_while="while"
keyword_with="with"
misc_1=" 1"
misc_a="a"
misc_array=" [1]"
misc_assign=" a=1"
misc_code_block=" {1}"
misc_colon_num=" 1:"
misc_colon_string=" 'a':"
misc_comma=" ,"
misc_comment_block=" /* */"
misc_comment_line=" //"
misc_cond=" 1?2:3"
misc_dec=" --"
misc_div=" /"
misc_equals=" ="
misc_fn=" a()"
misc_identical=" ==="
misc_inc=" ++"
misc_minus=" -"
misc_modulo=" %"
misc_parentheses=" ()"
misc_parentheses_1=" (1)"
misc_parentheses_1x4=" (1,1,1,1)"
misc_parentheses_a=" (a)"
misc_period="."
misc_plus=" +"
misc_plus_assign=" +="
misc_regex=" /a/g"
misc_rol=" <<<"
misc_semicolon=" ;"
misc_serialized_object=" {'a': 1}"
misc_string=" 'a'"
misc_unicode=" '\\u0001'"
object_Array=" Array"
object_Boolean=" Boolean"
object_Date=" Date"
object_Function=" Function"
object_Infinity=" Infinity"
object_Int8Array=" Int8Array"
object_Math=" Math"
object_NaN=" NaN"
object_Number=" Number"
object_Object=" Object"
object_RegExp=" RegExp"
object_String=" String"
object_Symbol=" Symbol"
object_false=" false"
object_null=" null"
object_true=" true"
prop_charAt=".charAt"
prop_concat=".concat"
prop_constructor=".constructor"
prop_destructor=".destructor"
prop_length=".length"
prop_match=".match"
prop_proto=".__proto__"
prop_prototype=".prototype"
prop_slice=".slice"
prop_toCode=".toCode"
prop_toString=".toString"
prop_valueOf=".valueOf"

@ -0,0 +1,52 @@
#
# AFL dictionary for JSON
# -----------------------
#
# Just the very basics.
#
# Inspired by a dictionary by Jakub Wilk <jwilk@jwilk.net>
#
"0"
",0"
":0"
"0:"
"-1.2e+3"
"true"
"false"
"null"
"\"\""
",\"\""
":\"\""
"\"\":"
"{}"
",{}"
":{}"
"{\"\":0}"
"{{}}"
"[]"
",[]"
":[]"
"[0]"
"[[]]"
"''"
"\\"
"\\b"
"\\f"
"\\n"
"\\r"
"\\t"
"\\u0000"
"\\x00"
"\\0"
"\\uD800\\uDC00"
"\\uDBFF\\uDFFF"
"\"\":0"
"//"
"/**/"

File diff suppressed because it is too large Load Diff

@ -0,0 +1,16 @@
#
# AFL dictionary for fuzzing Perl
# --------------------------------
#
# Created by @RandomDhiraj
#
"<:crlf"
"fwrite()"
"fread()"
":raw:utf8"
":raw:eol(LF)"
"Perl_invert()"
":raw:eol(CRLF)"
"Perl_PerlIO_eof()"

@ -0,0 +1,38 @@
#
# AFL dictionary for PNG images
# -----------------------------
#
# Just the basic, standard-originating sections; does not include vendor
# extensions.
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
header_png="\x89PNG\x0d\x0a\x1a\x0a"
section_IDAT="IDAT"
section_IEND="IEND"
section_IHDR="IHDR"
section_PLTE="PLTE"
section_bKGD="bKGD"
section_cHRM="cHRM"
section_fRAc="fRAc"
section_gAMA="gAMA"
section_gIFg="gIFg"
section_gIFt="gIFt"
section_gIFx="gIFx"
section_hIST="hIST"
section_iCCP="iCCP"
section_iTXt="iTXt"
section_oFFs="oFFs"
section_pCAL="pCAL"
section_pHYs="pHYs"
section_sBIT="sBIT"
section_sCAL="sCAL"
section_sPLT="sPLT"
section_sRGB="sRGB"
section_sTER="sTER"
section_tEXt="tEXt"
section_tIME="tIME"
section_tRNS="tRNS"
section_zTXt="zTXt"

@ -0,0 +1,254 @@
#
# AFL dictionary for JS regex
# ---------------------------
#
# Contains various regular expressions.
#
# Created by Yang Guo <yangguo@chromium.org>
#
"?"
"abc"
"()"
"[]"
"abc|def"
"abc|def|ghi"
"^xxx$"
"ab\\b\\d\\bcd"
"\\w|\\d"
"a*?"
"abc+"
"abc+?"
"xyz?"
"xyz??"
"xyz{0,1}"
"xyz{0,1}?"
"xyz{93}"
"xyz{1,32}"
"xyz{1,32}?"
"xyz{1,}"
"xyz{1,}?"
"a\\fb\\nc\\rd\\te\\vf"
"a\\nb\\bc"
"(?:foo)"
"(?: foo )"
"foo|(bar|baz)|quux"
"foo(?=bar)baz"
"foo(?!bar)baz"
"foo(?<=bar)baz"
"foo(?<!bar)baz"
"()"
"(?=)"
"[]"
"[x]"
"[xyz]"
"[a-zA-Z0-9]"
"[-123]"
"[^123]"
"]"
"}"
"[a-b-c]"
"[x\\dz]"
"[\\d-z]"
"[\\d-\\d]"
"[z-\\d]"
"\\cj\\cJ\\ci\\cI\\ck\\cK"
"\\c!"
"\\c_"
"\\c~"
"[\\c!]"
"[\\c_]"
"[\\c~]"
"[\\ca]"
"[\\cz]"
"[\\cA]"
"[\\cZ]"
"[\\c1]"
"\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ "
"[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]"
"\\8"
"\\9"
"\\11"
"\\11a"
"\\011"
"\\118"
"\\111"
"\\1111"
"(x)(x)(x)\\1"
"(x)(x)(x)\\2"
"(x)(x)(x)\\3"
"(x)(x)(x)\\4"
"(x)(x)(x)\\1*"
"(x)(x)(x)\\3*"
"(x)(x)(x)\\4*"
"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10"
"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11"
"(a)\\1"
"(a\\1)"
"(\\1a)"
"(\\2)(\\1)"
"(?=a){0,10}a"
"(?=a){1,10}a"
"(?=a){9,10}a"
"(?!a)?a"
"\\1(a)"
"(?!(a))\\1"
"(?!\\1(a\\1)\\1)\\1"
"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
"[\\0]"
"[\\11]"
"[\\11a]"
"[\\011]"
"[\\00011]"
"[\\118]"
"[\\111]"
"[\\1111]"
"\\x60"
"\\x3z"
"\\c"
"\\u0034"
"\\u003z"
"foo[z]*"
"\\u{12345}"
"\\u{12345}\\u{23456}"
"\\u{12345}{3}"
"\\u{12345}*"
"\\ud808\\udf45*"
"[\\ud808\\udf45-\\ud809\\udccc]"
"a"
"a|b"
"a\\n"
"a$"
"a\\b!"
"a\\Bb"
"a*?"
"a?"
"a??"
"a{0,1}?"
"a{1,2}?"
"a+?"
"(a)"
"(a)\\1"
"(\\1a)"
"\\1(a)"
"a\\s"
"a\\S"
"a\\D"
"a\\w"
"a\\W"
"a."
"a\\q"
"a[a]"
"a[^a]"
"a[a-z]"
"a(?:b)"
"a(?=b)"
"a(?!b)"
"\\x60"
"\\u0060"
"\\cA"
"\\q"
"\\1112"
"(a)\\1"
"(?!a)?a\\1"
"(?:(?=a))a\\1"
"a{}"
"a{,}"
"a{"
"a{z}"
"a{12z}"
"a{12,"
"a{12,3b"
"{}"
"{,}"
"{"
"{z}"
"{1z}"
"{12,"
"{12,3b"
"a"
"abc"
"a[bc]d"
"a|bc"
"ab|c"
"a||bc"
"(?:ab)"
"(?:ab|cde)"
"(?:ab)|cde"
"(ab)"
"(ab|cde)"
"(ab)\\1"
"(ab|cde)\\1"
"(?:ab)?"
"(?:ab)+"
"a?"
"a+"
"a??"
"a*?"
"a+?"
"(?:a?)?"
"(?:a+)?"
"(?:a?)+"
"(?:a*)+"
"(?:a+)+"
"(?:a?)*"
"(?:a*)*"
"(?:a+)*"
"a{0}"
"(?:a+){0,0}"
"a*b"
"a+b"
"a*b|c"
"a+b|c"
"(?:a{5,1000000}){3,1000000}"
"(?:ab){4,7}"
"a\\bc"
"a\\sc"
"a\\Sc"
"a(?=b)c"
"a(?=bbb|bb)c"
"a(?!bbb|bb)c"
"\xe2\x81\xa3"
"[\xe2\x81\xa3]"
"\xed\xb0\x80"
"\xed\xa0\x80"
"(\xed\xb0\x80)\x01"
"((\xed\xa0\x80))\x02"
"\xf0\x9f\x92\xa9"
"\x01"
"\x0f"
"[-\xf0\x9f\x92\xa9]+"
"[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\xbf]"
"(?<=)"
"(?<=a)"
"(?<!)"
"(?<!a)"
"(?<a>)"
"(?<a>.)"
"(?<a>.)\\k<a>"
"\\p{Script=Greek}"
"\\P{sc=Greek}"
"\\p{Script_Extensions=Greek}"
"\\P{scx=Greek}"
"\\p{General_Category=Decimal_Number}"
"\\P{gc=Decimal_Number}"
"\\p{gc=Nd}"
"\\P{Decimal_Number}"
"\\p{Nd}"
"\\P{Any}"
"\\p{Changes_When_NFKC_Casefolded}"
"(?:a?)??"
"a?)"xyz{93}"
"{93}"
"a{12za?)?"
"[\x8f]"
"[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\x92\xa9-\xf4\x8f\xbf\xbf]"
"[\x92\xa9-\xf4\x8f\xbf\xbf]"
"\\1\\2(b\\1\\2))\\2)\\1"
"\\1\\2(a(?:\\1\\2))\\2)\\1"
"?:\\1"
"\\1(b\\1\\2))\\2)\\1"
"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
"foo(?=bar)bar)baz"
"fo(?o(?o(?o(?=bar)baz"
"foo(?=bar)baz"
"foo(?=bar)bar)az"

@ -0,0 +1,282 @@
#
# AFL dictionary for SQL
# ----------------------
#
# Modeled based on SQLite documentation, contains some number of SQLite
# extensions. Other dialects of SQL may benefit from customized dictionaries.
#
# If you append @1 to the file name when loading this dictionary, afl-fuzz
# will also additionally load a selection of pragma keywords that are very
# specific to SQLite (and are probably less interesting from the security
# standpoint, because they are usually not allowed in non-privileged
# contexts).
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
function_abs=" abs(1)"
function_avg=" avg(1)"
function_changes=" changes()"
function_char=" char(1)"
function_coalesce=" coalesce(1,1)"
function_count=" count(1)"
function_date=" date(1,1,1)"
function_datetime=" datetime(1,1,1)"
function_decimal=" decimal(1,1)"
function_glob=" glob(1,1)"
function_group_concat=" group_concat(1,1)"
function_hex=" hex(1)"
function_ifnull=" ifnull(1,1)"
function_instr=" instr(1,1)"
function_julianday=" julianday(1,1,1)"
function_last_insert_rowid=" last_insert_rowid()"
function_length=" length(1)"
function_like=" like(1,1)"
function_likelihood=" likelihood(1,1)"
function_likely=" likely(1)"
function_load_extension=" load_extension(1,1)"
function_lower=" lower(1)"
function_ltrim=" ltrim(1,1)"
function_max=" max(1,1)"
function_min=" min(1,1)"
function_nullif=" nullif(1,1)"
function_printf=" printf(1,1)"
function_quote=" quote(1)"
function_random=" random()"
function_randomblob=" randomblob(1)"
function_replace=" replace(1,1,1)"
function_round=" round(1,1)"
function_rtrim=" rtrim(1,1)"
function_soundex=" soundex(1)"
function_sqlite_compileoption_get=" sqlite_compileoption_get(1)"
function_sqlite_compileoption_used=" sqlite_compileoption_used(1)"
function_sqlite_source_id=" sqlite_source_id()"
function_sqlite_version=" sqlite_version()"
function_strftime=" strftime(1,1,1,1)"
function_substr=" substr(1,1,1)"
function_sum=" sum(1)"
function_time=" time(1,1,1)"
function_total=" total(1)"
function_total_changes=" total_changes()"
function_trim=" trim(1,1)"
function_typeof=" typeof(1)"
function_unicode=" unicode(1)"
function_unlikely=" unlikely(1)"
function_upper=" upper(1)"
function_varchar=" varchar(1)"
function_zeroblob=" zeroblob(1)"
keyword_ABORT="ABORT"
keyword_ACTION="ACTION"
keyword_ADD="ADD"
keyword_AFTER="AFTER"
keyword_ALL="ALL"
keyword_ALTER="ALTER"
keyword_ANALYZE="ANALYZE"
keyword_AND="AND"
keyword_AS="AS"
keyword_ASC="ASC"
keyword_ATTACH="ATTACH"
keyword_AUTOINCREMENT="AUTOINCREMENT"
keyword_BEFORE="BEFORE"
keyword_BEGIN="BEGIN"
keyword_BETWEEN="BETWEEN"
keyword_BY="BY"
keyword_CASCADE="CASCADE"
keyword_CASE="CASE"
keyword_CAST="CAST"
keyword_CHECK="CHECK"
keyword_COLLATE="COLLATE"
keyword_COLUMN="COLUMN"
keyword_COMMIT="COMMIT"
keyword_CONFLICT="CONFLICT"
keyword_CONSTRAINT="CONSTRAINT"
keyword_CREATE="CREATE"
keyword_CROSS="CROSS"
keyword_CURRENT_DATE="CURRENT_DATE"
keyword_CURRENT_TIME="CURRENT_TIME"
keyword_CURRENT_TIMESTAMP="CURRENT_TIMESTAMP"
keyword_DATABASE="DATABASE"
keyword_DEFAULT="DEFAULT"
keyword_DEFERRABLE="DEFERRABLE"
keyword_DEFERRED="DEFERRED"
keyword_DELETE="DELETE"
keyword_DESC="DESC"
keyword_DETACH="DETACH"
keyword_DISTINCT="DISTINCT"
keyword_DROP="DROP"
keyword_EACH="EACH"
keyword_ELSE="ELSE"
keyword_END="END"
keyword_ESCAPE="ESCAPE"
keyword_EXCEPT="EXCEPT"
keyword_EXCLUSIVE="EXCLUSIVE"
keyword_EXISTS="EXISTS"
keyword_EXPLAIN="EXPLAIN"
keyword_FAIL="FAIL"
keyword_FOR="FOR"
keyword_FOREIGN="FOREIGN"
keyword_FROM="FROM"
keyword_FULL="FULL"
keyword_GLOB="GLOB"
keyword_GROUP="GROUP"
keyword_HAVING="HAVING"
keyword_IF="IF"
keyword_IGNORE="IGNORE"
keyword_IMMEDIATE="IMMEDIATE"
keyword_IN="IN"
keyword_INDEX="INDEX"
keyword_INDEXED="INDEXED"
keyword_INITIALLY="INITIALLY"
keyword_INNER="INNER"
keyword_INSERT="INSERT"
keyword_INSTEAD="INSTEAD"
keyword_INTERSECT="INTERSECT"
keyword_INTO="INTO"
keyword_IS="IS"
keyword_ISNULL="ISNULL"
keyword_JOIN="JOIN"
keyword_KEY="KEY"
keyword_LEFT="LEFT"
keyword_LIKE="LIKE"
keyword_LIMIT="LIMIT"
keyword_MATCH="MATCH"
keyword_NATURAL="NATURAL"
keyword_NO="NO"
keyword_NOT="NOT"
keyword_NOTNULL="NOTNULL"
keyword_NULL="NULL"
keyword_OF="OF"
keyword_OFFSET="OFFSET"
keyword_ON="ON"
keyword_OR="OR"
keyword_ORDER="ORDER"
keyword_OUTER="OUTER"
keyword_PLAN="PLAN"
keyword_PRAGMA="PRAGMA"
keyword_PRIMARY="PRIMARY"
keyword_QUERY="QUERY"
keyword_RAISE="RAISE"
keyword_RECURSIVE="RECURSIVE"
keyword_REFERENCES="REFERENCES"
keyword_REGEXP="REGEXP"
keyword_REINDEX="REINDEX"
keyword_RELEASE="RELEASE"
keyword_RENAME="RENAME"
keyword_REPLACE="REPLACE"
keyword_RESTRICT="RESTRICT"
keyword_RIGHT="RIGHT"
keyword_ROLLBACK="ROLLBACK"
keyword_ROW="ROW"
keyword_SAVEPOINT="SAVEPOINT"
keyword_SELECT="SELECT"
keyword_SET="SET"
keyword_TABLE="TABLE"
keyword_TEMP="TEMP"
keyword_TEMPORARY="TEMPORARY"
keyword_THEN="THEN"
keyword_TO="TO"
keyword_TRANSACTION="TRANSACTION"
keyword_TRIGGER="TRIGGER"
keyword_UNION="UNION"
keyword_UNIQUE="UNIQUE"
keyword_UPDATE="UPDATE"
keyword_USING="USING"
keyword_VACUUM="VACUUM"
keyword_VALUES="VALUES"
keyword_VIEW="VIEW"
keyword_VIRTUAL="VIRTUAL"
keyword_WHEN="WHEN"
keyword_WHERE="WHERE"
keyword_WITH="WITH"
keyword_WITHOUT="WITHOUT"
operator_concat=" || "
operator_ebove_eq=" >="
snippet_1eq1=" 1=1"
snippet_at=" @1"
snippet_backticks=" `a`"
snippet_blob=" blob"
snippet_brackets=" [a]"
snippet_colon=" :1"
snippet_comment=" /* */"
snippet_date="2001-01-01"
snippet_dollar=" $1"
snippet_dotref=" a.b"
snippet_fmtY="%Y"
snippet_int=" int"
snippet_neg1=" -1"
snippet_pair=" a,b"
snippet_parentheses=" (1)"
snippet_plus2days="+2 days"
snippet_qmark=" ?1"
snippet_semicolon=" ;"
snippet_star=" *"
snippet_string_pair=" \"a\",\"b\""
string_dbl_q=" \"a\""
string_escaped_q=" 'a''b'"
string_single_q=" 'a'"
pragma_application_id@1=" application_id"
pragma_auto_vacuum@1=" auto_vacuum"
pragma_automatic_index@1=" automatic_index"
pragma_busy_timeout@1=" busy_timeout"
pragma_cache_size@1=" cache_size"
pragma_cache_spill@1=" cache_spill"
pragma_case_sensitive_like@1=" case_sensitive_like"
pragma_checkpoint_fullfsync@1=" checkpoint_fullfsync"
pragma_collation_list@1=" collation_list"
pragma_compile_options@1=" compile_options"
pragma_count_changes@1=" count_changes"
pragma_data_store_directory@1=" data_store_directory"
pragma_database_list@1=" database_list"
pragma_default_cache_size@1=" default_cache_size"
pragma_defer_foreign_keys@1=" defer_foreign_keys"
pragma_empty_result_callbacks@1=" empty_result_callbacks"
pragma_encoding@1=" encoding"
pragma_foreign_key_check@1=" foreign_key_check"
pragma_foreign_key_list@1=" foreign_key_list"
pragma_foreign_keys@1=" foreign_keys"
pragma_freelist_count@1=" freelist_count"
pragma_full_column_names@1=" full_column_names"
pragma_fullfsync@1=" fullfsync"
pragma_ignore_check_constraints@1=" ignore_check_constraints"
pragma_incremental_vacuum@1=" incremental_vacuum"
pragma_index_info@1=" index_info"
pragma_index_list@1=" index_list"
pragma_integrity_check@1=" integrity_check"
pragma_journal_mode@1=" journal_mode"
pragma_journal_size_limit@1=" journal_size_limit"
pragma_legacy_file_format@1=" legacy_file_format"
pragma_locking_mode@1=" locking_mode"
pragma_max_page_count@1=" max_page_count"
pragma_mmap_size@1=" mmap_size"
pragma_page_count@1=" page_count"
pragma_page_size@1=" page_size"
pragma_parser_trace@1=" parser_trace"
pragma_query_only@1=" query_only"
pragma_quick_check@1=" quick_check"
pragma_read_uncommitted@1=" read_uncommitted"
pragma_recursive_triggers@1=" recursive_triggers"
pragma_reverse_unordered_selects@1=" reverse_unordered_selects"
pragma_schema_version@1=" schema_version"
pragma_secure_delete@1=" secure_delete"
pragma_short_column_names@1=" short_column_names"
pragma_shrink_memory@1=" shrink_memory"
pragma_soft_heap_limit@1=" soft_heap_limit"
pragma_stats@1=" stats"
pragma_synchronous@1=" synchronous"
pragma_table_info@1=" table_info"
pragma_temp_store@1=" temp_store"
pragma_temp_store_directory@1=" temp_store_directory"
pragma_threads@1=" threads"
pragma_user_version@1=" user_version"
pragma_vdbe_addoptrace@1=" vdbe_addoptrace"
pragma_vdbe_debug@1=" vdbe_debug"
pragma_vdbe_listing@1=" vdbe_listing"
pragma_vdbe_trace@1=" vdbe_trace"
pragma_wal_autocheckpoint@1=" wal_autocheckpoint"
pragma_wal_checkpoint@1=" wal_checkpoint"
pragma_writable_schema@1=" writable_schema"

@ -0,0 +1,51 @@
#
# AFL dictionary for TIFF images
# ------------------------------
#
# Just the basic, standard-originating sections; does not include vendor
# extensions.
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
header_ii="II*\x00"
header_mm="MM\x00*"
section_100="\x00\x01"
section_101="\x01\x01"
section_102="\x02\x01"
section_103="\x03\x01"
section_106="\x06\x01"
section_107="\x07\x01"
section_10D="\x0d\x01"
section_10E="\x0e\x01"
section_10F="\x0f\x01"
section_110="\x10\x01"
section_111="\x11\x01"
section_112="\x12\x01"
section_115="\x15\x01"
section_116="\x16\x01"
section_117="\x17\x01"
section_11A="\x1a\x01"
section_11B="\x1b\x01"
section_11C="\x1c\x01"
section_11D="\x1d\x01"
section_11E="\x1e\x01"
section_11F="\x1f\x01"
section_122="\"\x01"
section_123="#\x01"
section_124="$\x01"
section_125="%\x01"
section_128="(\x01"
section_129=")\x01"
section_12D="-\x01"
section_131="1\x01"
section_132="2\x01"
section_13B=";\x01"
section_13C="<\x01"
section_13D="=\x01"
section_13E=">\x01"
section_13F="?\x01"
section_140="@\x01"
section_FE="\xfe\x00"
section_FF="\xff\x00"

@ -0,0 +1,20 @@
#
# AFL dictionary for WebP images
# ------------------------------
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
header_RIFF="RIFF"
header_WEBP="WEBP"
section_ALPH="ALPH"
section_ANIM="ANIM"
section_ANMF="ANMF"
section_EXIF="EXIF"
section_FRGM="FRGM"
section_ICCP="ICCP"
section_VP8="VP8 "
section_VP8L="VP8L"
section_VP8X="VP8X"
section_XMP="XMP "

@ -0,0 +1,72 @@
#
# AFL dictionary for XML
# ----------------------
#
# Several basic syntax elements and attributes, modeled on libxml2.
#
# Created by Michal Zalewski <lcamtuf@google.com>
#
attr_encoding=" encoding=\"1\""
attr_generic=" a=\"1\""
attr_href=" href=\"1\""
attr_standalone=" standalone=\"no\""
attr_version=" version=\"1\""
attr_xml_base=" xml:base=\"1\""
attr_xml_id=" xml:id=\"1\""
attr_xml_lang=" xml:lang=\"1\""
attr_xml_space=" xml:space=\"1\""
attr_xmlns=" xmlns=\"1\""
entity_builtin="&lt;"
entity_decimal="&#1;"
entity_external="&a;"
entity_hex="&#x1;"
string_any="ANY"
string_brackets="[]"
string_cdata="CDATA"
string_col_fallback=":fallback"
string_col_generic=":a"
string_col_include=":include"
string_dashes="--"
string_empty="EMPTY"
string_empty_dblquotes="\"\""
string_empty_quotes="''"
string_entities="ENTITIES"
string_entity="ENTITY"
string_fixed="#FIXED"
string_id="ID"
string_idref="IDREF"
string_idrefs="IDREFS"
string_implied="#IMPLIED"
string_nmtoken="NMTOKEN"
string_nmtokens="NMTOKENS"
string_notation="NOTATION"
string_parentheses="()"
string_pcdata="#PCDATA"
string_percent="%a"
string_public="PUBLIC"
string_required="#REQUIRED"
string_schema=":schema"
string_system="SYSTEM"
string_ucs4="UCS-4"
string_utf16="UTF-16"
string_utf8="UTF-8"
string_xmlns="xmlns:"
tag_attlist="<!ATTLIST"
tag_cdata="<![CDATA["
tag_close="</a>"
tag_doctype="<!DOCTYPE"
tag_element="<!ELEMENT"
tag_entity="<!ENTITY"
tag_ignore="<![IGNORE["
tag_include="<![INCLUDE["
tag_notation="<!NOTATION"
tag_open="<a>"
tag_open_close="<a />"
tag_open_exclamation="<!"
tag_open_q="<?"
tag_sq2_close="]]>"
tag_xml_q="<?xml?>"

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

File diff suppressed because it is too large Load Diff

@ -0,0 +1,183 @@
=========================
Installation instructions
=========================
This document provides basic installation instructions and discusses known
issues for a variety of platforms. See README for the general instruction
manual.
1) Linux on x86
---------------
This platform is expected to work well. Compile the program with:
$ make
You can start using the fuzzer without installation, but it is also possible to
install it with:
# make install
There are no special dependencies to speak of; you will need GNU make and a
working compiler (gcc or clang). Some of the optional scripts bundled with the
program may depend on bash, gdb, and similar basic tools.
If you are using clang, please review llvm_mode/README.llvm; the LLVM
integration mode can offer substantial performance gains compared to the
traditional approach.
You may have to change several settings to get optimal results (most notably,
disable crash reporting utilities and switch to a different CPU governor), but
afl-fuzz will guide you through that if necessary.
2) OpenBSD, FreeBSD, NetBSD on x86
----------------------------------
Similarly to Linux, these platforms are expected to work well and are
regularly tested. Compile everything with GNU make:
$ gmake
Note that BSD make will *not* work; if you do not have gmake on your system,
please install it first. As on Linux, you can use the fuzzer itself without
installation, or install it with:
# gmake install
Keep in mind that if you are using csh as your shell, the syntax of some of the
shell commands given in the README and other docs will be different.
The llvm_mode requires a dynamically linked, fully-operational installation of
clang. At least on FreeBSD, the clang binaries are static and do not include
some of the essential tools, so if you want to make it work, you may need to
follow the instructions in llvm_mode/README.llvm.
Beyond that, everything should work as advertised.
The QEMU mode is currently supported only on Linux. I think it's just a QEMU
problem, I couldn't get a vanilla copy of user-mode emulation support working
correctly on BSD at all.
3) MacOS X on x86
-----------------
MacOS X should work, but there are some gotchas due to the idiosyncrasies of
the platform. On top of this, I have limited release testing capabilities
and depend mostly on user feedback.
To build AFL, install Xcode and follow the general instructions for Linux.
The Xcode 'gcc' tool is just a wrapper for clang, so be sure to use afl-clang
to compile any instrumented binaries; afl-gcc will fail unless you have GCC
installed from another source (in which case, please specify AFL_CC and
AFL_CXX to point to the "real" GCC binaries).
Only 64-bit compilation will work on the platform; porting the 32-bit
instrumentation would require a fair amount of work due to the way OS X
handles relocations, and today, virtually all MacOS X boxes are 64-bit.
The crash reporting daemon that comes by default with MacOS X will cause
problems with fuzzing. You need to turn it off by following the instructions
provided here: http://goo.gl/CCcd5u
The fork() semantics on OS X are a bit unusual compared to other unix systems
and definitely don't look POSIX-compliant. This means two things:
- Fuzzing will be probably slower than on Linux. In fact, some folks report
considerable performance gains by running the jobs inside a Linux VM on
MacOS X.
- Some non-portable, platform-specific code may be incompatible with the
AFL forkserver. If you run into any problems, set AFL_NO_FORKSRV=1 in the
environment before starting afl-fuzz.
User emulation mode of QEMU does not appear to be supported on MacOS X, so
black-box instrumentation mode (-Q) will not work.
The llvm_mode requires a fully-operational installation of clang. The one that
comes with Xcode is missing some of the essential headers and helper tools.
See llvm_mode/README.llvm for advice on how to build the compiler from scratch.
4) Linux or *BSD on non-x86 systems
-----------------------------------
Standard build will fail on non-x86 systems, but you should be able to
leverage two other options:
- The LLVM mode (see llvm_mode/README.llvm), which does not rely on
x86-specific assembly shims. It's fast and robust, but requires a
complete installation of clang.
- The QEMU mode (see qemu_mode/README.qemu), which can be also used for
fuzzing cross-platform binaries. It's slower and more fragile, but
can be used even when you don't have the source for the tested app.
If you're not sure what you need, you need the LLVM mode. To get it, try:
$ AFL_NO_X86=1 gmake && gmake -C llvm_mode
...and compile your target program with afl-clang-fast or afl-clang-fast++
instead of the traditional afl-gcc or afl-clang wrappers.
5) Solaris on x86
-----------------
The fuzzer reportedly works on Solaris, but I have not tested this first-hand,
and the user base is fairly small, so I don't have a lot of feedback.
To get the ball rolling, you will need to use GNU make and GCC or clang. I'm
being told that the stock version of GCC that comes with the platform does not
work properly due to its reliance on a hardcoded location for 'as' (completely
ignoring the -B parameter or $PATH).
To fix this, you may want to build stock GCC from the source, like so:
$ ./configure --prefix=$HOME/gcc --with-gnu-as --with-gnu-ld \
--with-gmp-include=/usr/include/gmp --with-mpfr-include=/usr/include/mpfr
$ make
$ sudo make install
Do *not* specify --with-as=/usr/gnu/bin/as - this will produce a GCC binary that
ignores the -B flag and you will be back to square one.
Note that Solaris reportedly comes with crash reporting enabled, which causes
problems with crashes being misinterpreted as hangs, similarly to the gotchas
for Linux and MacOS X. AFL does not auto-detect crash reporting on this
particular platform, but you may need to run the following command:
$ coreadm -d global -d global-setid -d process -d proc-setid \
-d kzone -d log
User emulation mode of QEMU is not available on Solaris, so black-box
instrumentation mode (-Q) will not work.
6) Everything else
------------------
You're on your own. On POSIX-compliant systems, you may be able to compile and
run the fuzzer; and the LLVM mode may offer a way to instrument non-x86 code.
The fuzzer will not run on Windows. It will also not work under Cygwin. It
could be ported to the latter platform fairly easily, but it's a pretty bad
idea, because Cygwin is extremely slow. It makes much more sense to use
VirtualBox or so to run a hardware-accelerated Linux VM; it will run around
20x faster or so. If you have a *really* compelling use case for Cygwin, let
me know.
Although Android on x86 should theoretically work, the stock kernel may have
SHM support compiled out, and if so, you may have to address that issue first.
It's possible that all you need is this workaround:
https://github.com/pelya/android-shmem
Joshua J. Drake notes that the Android linker adds a shim that automatically
intercepts SIGSEGV and related signals. To fix this issue and be able to see
crashes, you need to put this at the beginning of the fuzzed program:
signal(SIGILL, SIG_DFL);
signal(SIGABRT, SIG_DFL);
signal(SIGBUS, SIG_DFL);
signal(SIGFPE, SIG_DFL);
signal(SIGSEGV, SIG_DFL);
You may need to #include <signal.h> first.

@ -0,0 +1,49 @@
=====================
AFL quick start guide
=====================
You should read docs/README. It's pretty short. If you really can't, here's
how to hit the ground running:
1) Compile AFL with 'make'. If build fails, see docs/INSTALL for tips.
2) Find or write a reasonably fast and simple program that takes data from
a file or stdin, processes it in a test-worthy way, then exits cleanly.
If testing a network service, modify it to run in the foreground and read
from stdin. When fuzzing a format that uses checksums, comment out the
checksum verification code, too.
The program must crash properly when a fault is encountered. Watch out for
custom SIGSEGV or SIGABRT handlers and background processes. For tips on
detecting non-crashing flaws, see section 11 in docs/README.
3) Compile the program / library to be fuzzed using afl-gcc. A common way to
do this would be:
CC=/path/to/afl-gcc CXX=/path/to/afl-g++ ./configure --disable-shared
make clean all
If program build fails, ping <afl-users@googlegroups.com>.
4) Get a small but valid input file that makes sense to the program. When
fuzzing verbose syntax (SQL, HTTP, etc), create a dictionary as described in
dictionaries/README.dictionaries, too.
5) If the program reads from stdin, run 'afl-fuzz' like so:
./afl-fuzz -i testcase_dir -o findings_dir -- \
/path/to/tested/program [...program's cmdline...]
If the program takes input from a file, you can put @@ in the program's
command line; AFL will put an auto-generated file name in there for you.
6) Investigate anything shown in red in the fuzzer UI by promptly consulting
docs/status_screen.txt.
That's it. Sit back, relax, and - time permitting - try to skim through the
following files:
- docs/README - A general introduction to AFL,
- docs/perf_tips.txt - Simple tips on how to fuzz more quickly,
- docs/status_screen.txt - An explanation of the tidbits shown in the UI,
- docs/parallel_fuzzing.txt - Advice on running AFL on multiple cores.

@ -0,0 +1,281 @@
=======================
Environmental variables
=======================
This document discusses the environment variables used by American Fuzzy Lop
to expose various exotic functions that may be (rarely) useful for power
users or for some types of custom fuzzing setups. See README for the general
instruction manual.
1) Settings for afl-gcc, afl-clang, and afl-as
----------------------------------------------
Because they can't directly accept command-line options, the compile-time
tools make fairly broad use of environmental variables:
- Setting AFL_HARDEN automatically adds code hardening options when invoking
the downstream compiler. This currently includes -D_FORTIFY_SOURCE=2 and
-fstack-protector-all. The setting is useful for catching non-crashing
memory bugs at the expense of a very slight (sub-5%) performance loss.
- By default, the wrapper appends -O3 to optimize builds. Very rarely, this
will cause problems in programs built with -Werror, simply because -O3
enables more thorough code analysis and can spew out additional warnings.
To disable optimizations, set AFL_DONT_OPTIMIZE.
- Setting AFL_USE_ASAN automatically enables ASAN, provided that your
compiler supports that. Note that fuzzing with ASAN is mildly challenging
- see notes_for_asan.txt.
(You can also enable MSAN via AFL_USE_MSAN; ASAN and MSAN come with the
same gotchas; the modes are mutually exclusive. UBSAN and other exotic
sanitizers are not officially supported yet, but are easy to get to work
by hand.)
- Setting AFL_CC, AFL_CXX, and AFL_AS lets you use alternate downstream
compilation tools, rather than the default 'clang', 'gcc', or 'as' binaries
in your $PATH.
- AFL_PATH can be used to point afl-gcc to an alternate location of afl-as.
One possible use of this is experimental/clang_asm_normalize/, which lets
you instrument hand-written assembly when compiling clang code by plugging
a normalizer into the chain. (There is no equivalent feature for GCC.)
- Setting AFL_INST_RATIO to a percentage between 0 and 100% controls the
probability of instrumenting every branch. This is (very rarely) useful
when dealing with exceptionally complex programs that saturate the output
bitmap. Examples include v8, ffmpeg, and perl.
(If this ever happens, afl-fuzz will warn you ahead of the time by
displaying the "bitmap density" field in fiery red.)
Setting AFL_INST_RATIO to 0 is a valid choice. This will instrument only
the transitions between function entry points, but not individual branches.
- AFL_NO_BUILTIN causes the compiler to generate code suitable for use with
libtokencap.so (but perhaps running a bit slower than without the flag).
- TMPDIR is used by afl-as for temporary files; if this variable is not set,
the tool defaults to /tmp.
- Setting AFL_KEEP_ASSEMBLY prevents afl-as from deleting instrumented
assembly files. Useful for troubleshooting problems or understanding how
the tool works. To get them in a predictable place, try something like:
mkdir assembly_here
TMPDIR=$PWD/assembly_here AFL_KEEP_ASSEMBLY=1 make clean all
- Setting AFL_QUIET will prevent afl-cc and afl-as banners from being
displayed during compilation, in case you find them distracting.
2) Settings for afl-clang-fast
------------------------------
The native LLVM instrumentation helper accepts a subset of the settings
discussed in section #1, with the exception of:
- AFL_AS, since this toolchain does not directly invoke GNU as.
- TMPDIR and AFL_KEEP_ASSEMBLY, since no temporary assembly files are
created.
Note that AFL_INST_RATIO will behave a bit differently than for afl-gcc,
because functions are *not* instrumented unconditionally - so low values
will have a more striking effect. For this tool, 0 is not a valid choice.
3) Settings for afl-fuzz
------------------------
The main fuzzer binary accepts several options that disable a couple of sanity
checks or alter some of the more exotic semantics of the tool:
- Setting AFL_SKIP_CPUFREQ skips the check for CPU scaling policy. This is
useful if you can't change the defaults (e.g., no root access to the
system) and are OK with some performance loss.
- Setting AFL_NO_FORKSRV disables the forkserver optimization, reverting to
fork + execve() call for every tested input. This is useful mostly when
working with unruly libraries that create threads or do other crazy
things when initializing (before the instrumentation has a chance to run).
Note that this setting inhibits some of the user-friendly diagnostics
normally done when starting up the forkserver and causes a pretty
significant performance drop.
- AFL_EXIT_WHEN_DONE causes afl-fuzz to terminate when all existing paths
have been fuzzed and there were no new finds for a while. This would be
normally indicated by the cycle counter in the UI turning green. May be
convenient for some types of automated jobs.
- Setting AFL_NO_AFFINITY disables attempts to bind to a specific CPU core
on Linux systems. This slows things down, but lets you run more instances
of afl-fuzz than would be prudent (if you really want to).
- AFL_SKIP_CRASHES causes AFL to tolerate crashing files in the input
queue. This can help with rare situations where a program crashes only
intermittently, but it's not really recommended under normal operating
conditions.
- Setting AFL_HANG_TMOUT allows you to specify a different timeout for
deciding if a particular test case is a "hang". The default is 1 second
or the value of the -t parameter, whichever is larger. Dialing the value
down can be useful if you are very concerned about slow inputs, or if you
don't want AFL to spend too much time classifying that stuff and just
rapidly put all timeouts in that bin.
- AFL_NO_ARITH causes AFL to skip most of the deterministic arithmetics.
This can be useful to speed up the fuzzing of text-based file formats.
- AFL_SHUFFLE_QUEUE randomly reorders the input queue on startup. Requested
by some users for unorthodox parallelized fuzzing setups, but not
advisable otherwise.
- When developing custom instrumentation on top of afl-fuzz, you can use
AFL_SKIP_BIN_CHECK to inhibit the checks for non-instrumented binaries
and shell scripts; and AFL_DUMB_FORKSRV in conjunction with the -n
setting to instruct afl-fuzz to still follow the fork server protocol
without expecting any instrumentation data in return.
- When running in the -M or -S mode, setting AFL_IMPORT_FIRST causes the
fuzzer to import test cases from other instances before doing anything
else. This makes the "own finds" counter in the UI more accurate.
Beyond counter aesthetics, not much else should change.
- Setting AFL_POST_LIBRARY allows you to configure a postprocessor for
mutated files - say, to fix up checksums. See experimental/post_library/
for more.
- AFL_FAST_CAL keeps the calibration stage about 2.5x faster (albeit less
precise), which can help when starting a session against a slow target.
- The CPU widget shown at the bottom of the screen is fairly simplistic and
may complain of high load prematurely, especially on systems with low core
counts. To avoid the alarming red color, you can set AFL_NO_CPU_RED.
- In QEMU mode (-Q), AFL_PATH will be searched for afl-qemu-trace.
- Setting AFL_PRELOAD causes AFL to set LD_PRELOAD for the target binary
without disrupting the afl-fuzz process itself. This is useful, among other
things, for bootstrapping libdislocator.so.
- Setting AFL_NO_UI inhibits the UI altogether, and just periodically prints
some basic stats. This behavior is also automatically triggered when the
output from afl-fuzz is redirected to a file or to a pipe.
- If you are Jakub, you may need AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES.
Others need not apply.
- Benchmarking only: AFL_BENCH_JUST_ONE causes the fuzzer to exit after
processing the first queue entry; and AFL_BENCH_UNTIL_CRASH causes it to
exit soon after the first crash is found.
4) Settings for afl-qemu-trace
------------------------------
The QEMU wrapper used to instrument binary-only code supports several settings:
- It is possible to set AFL_INST_RATIO to skip the instrumentation on some
of the basic blocks, which can be useful when dealing with very complex
binaries.
- Setting AFL_INST_LIBS causes the translator to also instrument the code
inside any dynamically linked libraries (notably including glibc).
- The underlying QEMU binary will recognize any standard "user space
emulation" variables (e.g., QEMU_STACK_SIZE), but there should be no
reason to touch them.
5) Settings for afl-cmin
------------------------
The corpus minimization script offers very little customization:
- Setting AFL_PATH offers a way to specify the location of afl-showmap
and afl-qemu-trace (the latter only in -Q mode).
- AFL_KEEP_TRACES makes the tool keep traces and other metadata used for
minimization and normally deleted at exit. The files can be found in the
<out_dir>/.traces/*.
- AFL_ALLOW_TMP permits this and some other scripts to run in /tmp. This is
a modest security risk on multi-user systems with rogue users, but should
be safe on dedicated fuzzing boxes.
6) Settings for afl-tmin
------------------------
Virtually nothing to play with. Well, in QEMU mode (-Q), AFL_PATH will be
searched for afl-qemu-trace. In addition to this, TMPDIR may be used if a
temporary file can't be created in the current working directory.
You can specify AFL_TMIN_EXACT if you want afl-tmin to require execution paths
to match when minimizing crashes. This will make minimization less useful, but
may prevent the tool from "jumping" from one crashing condition to another in
very buggy software. You probably want to combine it with the -e flag.
7) Settings for afl-analyze
---------------------------
You can set AFL_ANALYZE_HEX to get file offsets printed as hexadecimal instead
of decimal.
8) Settings for libdislocator.so
--------------------------------
The library honors three environmental variables:
- AFL_LD_LIMIT_MB caps the size of the maximum heap usage permitted by the
library, in megabytes. The default value is 1 GB. Once this is exceeded,
allocations will return NULL.
- AFL_LD_HARD_FAIL alters the behavior by calling abort() on excessive
allocations, thus causing what AFL would perceive as a crash. Useful for
programs that are supposed to maintain a specific memory footprint.
- AFL_LD_VERBOSE causes the library to output some diagnostic messages
that may be useful for pinpointing the cause of any observed issues.
- AFL_LD_NO_CALLOC_OVER inhibits abort() on calloc() overflows. Most
of the common allocators check for that internally and return NULL, so
it's a security risk only in more exotic setups.
9) Settings for libtokencap.so
------------------------------
This library accepts AFL_TOKEN_FILE to indicate the location to which the
discovered tokens should be written.
10) Third-party variables set by afl-fuzz & other tools
-------------------------------------------------------
Several variables are not directly interpreted by afl-fuzz, but are set to
optimal values if not already present in the environment:
- By default, LD_BIND_NOW is set to speed up fuzzing by forcing the
linker to do all the work before the fork server kicks in. You can
override this by setting LD_BIND_LAZY beforehand, but it is almost
certainly pointless.
- By default, ASAN_OPTIONS are set to:
abort_on_error=1
detect_leaks=0
symbolize=0
allocator_may_return_null=1
If you want to set your own options, be sure to include abort_on_error=1 -
otherwise, the fuzzer will not be able to detect crashes in the tested
app. Similarly, include symbolize=0, since without it, AFL may have
difficulty telling crashes and hangs apart.
- In the same vein, by default, MSAN_OPTIONS are set to:
exit_code=86 (required for legacy reasons)
abort_on_error=1
symbolize=0
msan_track_origins=0
allocator_may_return_null=1
Be sure to include the first one when customizing anything, since some
MSAN versions don't call abort() on error, and we need a way to detect
faults.

@ -0,0 +1,147 @@
================
Historical notes
================
This doc talks about the rationale of some of the high-level design decisions
for American Fuzzy Lop. It's adopted from a discussion with Rob Graham.
See README for the general instruction manual, and technical_details.txt for
additional implementation-level insights.
1) Influences
-------------
In short, afl-fuzz is inspired chiefly by the work done by Tavis Ormandy back
in 2007. Tavis did some very persuasive experiments using gcov block coverage
to select optimal test cases out of a large corpus of data, and then using
them as a starting point for traditional fuzzing workflows.
(By "persuasive", I mean: netting a significant number of interesting
vulnerabilities.)
In parallel to this, both Tavis and I were interested in evolutionary fuzzing.
Tavis had his experiments, and I was working on a tool called bunny-the-fuzzer,
released somewhere in 2007.
Bunny used a generational algorithm not much different from afl-fuzz, but
also tried to reason about the relationship between various input bits and
the internal state of the program, with hopes of deriving some additional value
from that. The reasoning / correlation part was probably in part inspired by
other projects done around the same time by Will Drewry and Chris Evans.
The state correlation approach sounded very sexy on paper, but ultimately, made
the fuzzer complicated, brittle, and cumbersome to use; every other target
program would require a tweak or two. Because Bunny didn't fare a whole lot
better than less sophisticated brute-force tools, I eventually decided to write
it off. You can still find its original documentation at:
https://code.google.com/p/bunny-the-fuzzer/wiki/BunnyDoc
There has been a fair amount of independent work, too. Most notably, a few
weeks earlier that year, Jared DeMott had a Defcon presentation about a
coverage-driven fuzzer that relied on coverage as a fitness function.
Jared's approach was by no means identical to what afl-fuzz does, but it was in
the same ballpark. His fuzzer tried to explicitly solve for the maximum coverage
with a single input file; in comparison, afl simply selects for cases that do
something new (which yields better results - see technical_details.txt).
A few years later, Gabriel Campana released fuzzgrind, a tool that relied purely
on Valgrind and a constraint solver to maximize coverage without any brute-force
bits; and Microsoft Research folks talked extensively about their still
non-public, solver-based SAGE framework.
In the past six years or so, I've also seen a fair number of academic papers
that dealt with smart fuzzing (focusing chiefly on symbolic execution) and a
couple papers that discussed proof-of-concept applications of genetic
algorithms with the same goals in mind. I'm unconvinced how practical most of
these experiments were; I suspect that many of them suffer from the
bunny-the-fuzzer's curse of being cool on paper and in carefully designed
experiments, but failing the ultimate test of being able to find new,
worthwhile security bugs in otherwise well-fuzzed, real-world software.
In some ways, the baseline that the "cool" solutions have to compete against is
a lot more impressive than it may seem, making it difficult for competitors to
stand out. For a singular example, check out the work by Gynvael and Mateusz
Jurczyk, applying "dumb" fuzzing to ffmpeg, a prominent and security-critical
component of modern browsers and media players:
http://googleonlinesecurity.blogspot.com/2014/01/ffmpeg-and-thousand-fixes.html
Effortlessly getting comparable results with state-of-the-art symbolic execution
in equally complex software still seems fairly unlikely, and hasn't been
demonstrated in practice so far.
But I digress; ultimately, attribution is hard, and glorying the fundamental
concepts behind AFL is probably a waste of time. The devil is very much in the
often-overlooked details, which brings us to...
2) Design goals for afl-fuzz
----------------------------
In short, I believe that the current implementation of afl-fuzz takes care of
several itches that seemed impossible to scratch with other tools:
1) Speed. It's genuinely hard to compete with brute force when your "smart"
approach is resource-intensive. If your instrumentation makes it 10x more
likely to find a bug, but runs 100x slower, your users are getting a bad
deal.
To avoid starting with a handicap, afl-fuzz is meant to let you fuzz most of
the intended targets at roughly their native speed - so even if it doesn't
add value, you do not lose much.
On top of this, the tool leverages instrumentation to actually reduce the
amount of work in a couple of ways: for example, by carefully trimming the
corpus or skipping non-functional but non-trimmable regions in the input
files.
2) Rock-solid reliability. It's hard to compete with brute force if your
approach is brittle and fails unexpectedly. Automated testing is attractive
because it's simple to use and scalable; anything that goes against these
principles is an unwelcome trade-off and means that your tool will be used
less often and with less consistent results.
Most of the approaches based on symbolic execution, taint tracking, or
complex syntax-aware instrumentation are currently fairly unreliable with
real-world targets. Perhaps more importantly, their failure modes can render
them strictly worse than "dumb" tools, and such degradation can be difficult
for less experienced users to notice and correct.
In contrast, afl-fuzz is designed to be rock solid, chiefly by keeping it
simple. In fact, at its core, it's designed to be just a very good
traditional fuzzer with a wide range of interesting, well-researched
strategies to go by. The fancy parts just help it focus the effort in
places where it matters the most.
3) Simplicity. The author of a testing framework is probably the only person
who truly understands the impact of all the settings offered by the tool -
and who can dial them in just right. Yet, even the most rudimentary fuzzer
frameworks often come with countless knobs and fuzzing ratios that need to
be guessed by the operator ahead of the time. This can do more harm than
good.
AFL is designed to avoid this as much as possible. The three knobs you
can play with are the output file, the memory limit, and the ability to
override the default, auto-calibrated timeout. The rest is just supposed to
work. When it doesn't, user-friendly error messages outline the probable
causes and workarounds, and get you back on track right away.
4) Chainability. Most general-purpose fuzzers can't be easily employed
against resource-hungry or interaction-heavy tools, necessitating the
creation of custom in-process fuzzers or the investment of massive CPU
power (most of which is wasted on tasks not directly related to the code
we actually want to test).
AFL tries to scratch this itch by allowing users to use more lightweight
targets (e.g., standalone image parsing libraries) to create small
corpora of interesting test cases that can be fed into a manual testing
process or a UI harness later on.
As mentioned in technical_details.txt, AFL does all this not by systematically
applying a single overarching CS concept, but by experimenting with a variety
of small, complementary methods that were shown to reliably yields results
better than chance. The use of instrumentation is a part of that toolkit, but is
far from being the most important one.
Ultimately, what matters is that afl-fuzz is designed to find cool bugs - and
has a pretty robust track record of doing just that.

@ -0,0 +1,128 @@
# ===================
# AFL "Life Pro Tips"
# ===================
#
# Bite-sized advice for those who understand the basics, but can't be bothered
# to read or memorize every other piece of documentation for AFL.
#
%
Get more bang for your buck by using fuzzing dictionaries.
See dictionaries/README.dictionaries to learn how.
%
You can get the most out of your hardware by parallelizing AFL jobs.
See docs/parallel_fuzzing.txt for step-by-step tips.
%
Improve the odds of spotting memory corruption bugs with libdislocator.so!
It's easy. Consult libdislocator/README.dislocator for usage tips.
%
Want to understand how your target parses a particular input file?
Try the bundled afl-analyze tool; it's got colors and all!
%
You can visually monitor the progress of your fuzzing jobs.
Run the bundled afl-plot utility to generate browser-friendly graphs.
%
Need to monitor AFL jobs programmatically? Check out the fuzzer_stats file
in the AFL output dir or try afl-whatsup.
%
Puzzled by something showing up in red or purple in the AFL UI?
It could be important - consult docs/status_screen.txt right away!
%
Know your target? Convert it to persistent mode for a huge performance gain!
Consult section #5 in llvm_mode/README.llvm for tips.
%
Using clang? Check out llvm_mode/ for a faster alternative to afl-gcc!
%
Did you know that AFL can fuzz closed-source or cross-platform binaries?
Check out qemu_mode/README.qemu for more.
%
Did you know that afl-fuzz can minimize any test case for you?
Try the bundled afl-tmin tool - and get small repro files fast!
%
Not sure if a crash is exploitable? AFL can help you figure it out. Specify
-C to enable the peruvian were-rabbit mode. See section #10 in README for more.
%
Trouble dealing with a machine uprising? Relax, we've all been there.
Find essential survival tips at http://lcamtuf.coredump.cx/prep/.
%
AFL-generated corpora can be used to power other testing processes.
See section #2 in README for inspiration - it tends to pay off!
%
Want to automatically spot non-crashing memory handling bugs?
Try running an AFL-generated corpus through ASAN, MSAN, or Valgrind.
%
Good selection of input files is critical to a successful fuzzing job.
See section #5 in README (or docs/perf_tips.txt) for pro tips.
%
You can improve the odds of automatically spotting stack corruption issues.
Specify AFL_HARDEN=1 in the environment to enable hardening flags.
%
Bumping into problems with non-reproducible crashes? It happens, but usually
isn't hard to diagnose. See section #7 in README for tips.
%
Fuzzing is not just about memory corruption issues in the codebase. Add some
sanity-checking assert() / abort() statements to effortlessly catch logic bugs.
%
Hey kid... pssst... want to figure out how AFL really works?
Check out docs/technical_details.txt for all the gory details in one place!
%
There's a ton of third-party helper tools designed to work with AFL!
Be sure to check out docs/sister_projects.txt before writing your own.
%
Need to fuzz the command-line arguments of a particular program?
You can find a simple solution in experimental/argv_fuzzing.
%
Attacking a format that uses checksums? Remove the checksum-checking code or
use a postprocessor! See experimental/post_library/ for more.
%
Dealing with a very slow target or hoping for instant results? Specify -d
when calling afl-fuzz!
%

@ -0,0 +1,143 @@
==================================
Notes for using ASAN with afl-fuzz
==================================
This file discusses some of the caveats for fuzzing under ASAN, and suggests
a handful of alternatives. See README for the general instruction manual.
1) Short version
----------------
ASAN on 64-bit systems requests a lot of memory in a way that can't be easily
distinguished from a misbehaving program bent on crashing your system.
Because of this, fuzzing with ASAN is recommended only in four scenarios:
- On 32-bit systems, where we can always enforce a reasonable memory limit
(-m 800 or so is a good starting point),
- On 64-bit systems only if you can do one of the following:
- Compile the binary in 32-bit mode (gcc -m32),
- Precisely gauge memory needs using http://jwilk.net/software/recidivm .
- Limit the memory available to process using cgroups on Linux (see
experimental/asan_cgroups).
To compile with ASAN, set AFL_USE_ASAN=1 before calling 'make clean all'. The
afl-gcc / afl-clang wrappers will pick that up and add the appropriate flags.
Note that ASAN is incompatible with -static, so be mindful of that.
(You can also use AFL_USE_MSAN=1 to enable MSAN instead.)
There is also the option of generating a corpus using a non-ASAN binary, and
then feeding it to an ASAN-instrumented one to check for bugs. This is faster,
and can give you somewhat comparable results. You can also try using
libdislocator (see libdislocator/README.dislocator in the parent directory) as a
lightweight and hassle-free (but less thorough) alternative.
2) Long version
---------------
ASAN allocates a huge region of virtual address space for bookkeeping purposes.
Most of this is never actually accessed, so the OS never has to allocate any
real pages of memory for the process, and the VM grabbed by ASAN is essentially
"free" - but the mapping counts against the standard OS-enforced limit
(RLIMIT_AS, aka ulimit -v).
On our end, afl-fuzz tries to protect you from processes that go off-rails
and start consuming all the available memory in a vain attempt to parse a
malformed input file. This happens surprisingly often, so enforcing such a limit
is important for almost any fuzzer: the alternative is for the kernel OOM
handler to step in and start killing random processes to free up resources.
Needless to say, that's not a very nice prospect to live with.
Unfortunately, un*x systems offer no portable way to limit the amount of
pages actually given to a process in a way that distinguishes between that
and the harmless "land grab" done by ASAN. In principle, there are three standard
ways to limit the size of the heap:
- The RLIMIT_AS mechanism (ulimit -v) caps the size of the virtual space -
but as noted, this pays no attention to the number of pages actually
in use by the process, and doesn't help us here.
- The RLIMIT_DATA mechanism (ulimit -d) seems like a good fit, but it applies
only to the traditional sbrk() / brk() methods of requesting heap space;
modern allocators, including the one in glibc, routinely rely on mmap()
instead, and circumvent this limit completely.
- Finally, the RLIMIT_RSS limit (ulimit -m) sounds like what we need, but
doesn't work on Linux - mostly because nobody felt like implementing it.
There are also cgroups, but they are Linux-specific, not universally available
even on Linux systems, and they require root permissions to set up; I'm a bit
hesitant to make afl-fuzz require root permissions just for that. That said,
if you are on Linux and want to use cgroups, check out the contributed script
that ships in experimental/asan_cgroups/.
In settings where cgroups aren't available, we have no nice, portable way to
avoid counting the ASAN allocation toward the limit. On 32-bit systems, or for
binaries compiled in 32-bit mode (-m32), this is not a big deal: ASAN needs
around 600-800 MB or so, depending on the compiler - so all you need to do is
to specify -m that is a bit higher than that.
On 64-bit systems, the situation is more murky, because the ASAN allocation
is completely outlandish - around 17.5 TB in older versions, and closer to
20 TB with newest ones. The actual amount of memory on your system is
(probably!) just a tiny fraction of that - so unless you dial the limit
with surgical precision, you will get no protection from OOM bugs.
On my system, the amount of memory grabbed by ASAN with a slightly older
version of gcc is around 17,825,850 MB; for newest clang, it's 20,971,600.
But there is no guarantee that these numbers are stable, and if you get them
wrong by "just" a couple gigs or so, you will be at risk.
To get the precise number, you can use the recidivm tool developed by Jakub
Wilk (http://jwilk.net/software/recidivm). In absence of this, ASAN is *not*
recommended when fuzzing 64-bit binaries, unless you are confident that they
are robust and enforce reasonable memory limits (in which case, you can
specify '-m none' when calling afl-fuzz).
Using recidivm or running with no limits aside, there are two other decent
alternatives: build a corpus of test cases using a non-ASAN binary, and then
examine them with ASAN, Valgrind, or other heavy-duty tools in a more
controlled setting; or compile the target program with -m32 (32-bit mode)
if your system supports that.
3) Interactions with the QEMU mode
----------------------------------
ASAN, MSAN, and other sanitizers appear to be incompatible with QEMU user
emulation, so please do not try to use them with the -Q option; QEMU doesn't
seem to appreciate the shadow VM trick used by these tools, and will likely
just allocate all your physical memory, then crash.
4) ASAN and OOM crashes
-----------------------
By default, ASAN treats memory allocation failures as fatal errors, immediately
causing the program to crash. Since this is a departure from normal POSIX
semantics (and creates the appearance of security issues in otherwise
properly-behaving programs), we try to disable this by specifying
allocator_may_return_null=1 in ASAN_OPTIONS.
Unfortunately, it's been reported that this setting still causes ASAN to
trigger phantom crashes in situations where the standard allocator would
simply return NULL. If this is interfering with your fuzzing jobs, you may
want to cc: yourself on this bug:
https://bugs.llvm.org/show_bug.cgi?id=22026
5) What about UBSAN?
--------------------
Some folks expressed interest in fuzzing with UBSAN. This isn't officially
supported, because many installations of UBSAN don't offer a consistent way
to abort() on fault conditions or to terminate with a distinctive exit code.
That said, some versions of the library can be binary-patched to address this
issue, while newer releases support explicit compile-time flags - see this
mailing list thread for tips:
https://groups.google.com/forum/#!topic/afl-users/GyeSBJt4M38

@ -0,0 +1,216 @@
=========================
Tips for parallel fuzzing
=========================
This document talks about synchronizing afl-fuzz jobs on a single machine
or across a fleet of systems. See README for the general instruction manual.
1) Introduction
---------------
Every copy of afl-fuzz will take up one CPU core. This means that on an
n-core system, you can almost always run around n concurrent fuzzing jobs with
virtually no performance hit (you can use the afl-gotcpu tool to make sure).
In fact, if you rely on just a single job on a multi-core system, you will
be underutilizing the hardware. So, parallelization is usually the right
way to go.
When targeting multiple unrelated binaries or using the tool in "dumb" (-n)
mode, it is perfectly fine to just start up several fully separate instances
of afl-fuzz. The picture gets more complicated when you want to have multiple
fuzzers hammering a common target: if a hard-to-hit but interesting test case
is synthesized by one fuzzer, the remaining instances will not be able to use
that input to guide their work.
To help with this problem, afl-fuzz offers a simple way to synchronize test
cases on the fly.
2) Single-system parallelization
--------------------------------
If you wish to parallelize a single job across multiple cores on a local
system, simply create a new, empty output directory ("sync dir") that will be
shared by all the instances of afl-fuzz; and then come up with a naming scheme
for every instance - say, "fuzzer01", "fuzzer02", etc.
Run the first one ("master", -M) like this:
$ ./afl-fuzz -i testcase_dir -o sync_dir -M fuzzer01 [...other stuff...]
...and then, start up secondary (-S) instances like this:
$ ./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer02 [...other stuff...]
$ ./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer03 [...other stuff...]
Each fuzzer will keep its state in a separate subdirectory, like so:
/path/to/sync_dir/fuzzer01/
Each instance will also periodically rescan the top-level sync directory
for any test cases found by other fuzzers - and will incorporate them into
its own fuzzing when they are deemed interesting enough.
The difference between the -M and -S modes is that the master instance will
still perform deterministic checks; while the secondary instances will
proceed straight to random tweaks. If you don't want to do deterministic
fuzzing at all, it's OK to run all instances with -S. With very slow or complex
targets, or when running heavily parallelized jobs, this is usually a good plan.
Note that running multiple -M instances is wasteful, although there is an
experimental support for parallelizing the deterministic checks. To leverage
that, you need to create -M instances like so:
$ ./afl-fuzz -i testcase_dir -o sync_dir -M masterA:1/3 [...]
$ ./afl-fuzz -i testcase_dir -o sync_dir -M masterB:2/3 [...]
$ ./afl-fuzz -i testcase_dir -o sync_dir -M masterC:3/3 [...]
...where the first value after ':' is the sequential ID of a particular master
instance (starting at 1), and the second value is the total number of fuzzers to
distribute the deterministic fuzzing across. Note that if you boot up fewer
fuzzers than indicated by the second number passed to -M, you may end up with
poor coverage.
You can also monitor the progress of your jobs from the command line with the
provided afl-whatsup tool. When the instances are no longer finding new paths,
it's probably time to stop.
WARNING: Exercise caution when explicitly specifying the -f option. Each fuzzer
must use a separate temporary file; otherwise, things will go south. One safe
example may be:
$ ./afl-fuzz [...] -S fuzzer10 -f file10.txt ./fuzzed/binary @@
$ ./afl-fuzz [...] -S fuzzer11 -f file11.txt ./fuzzed/binary @@
$ ./afl-fuzz [...] -S fuzzer12 -f file12.txt ./fuzzed/binary @@
This is not a concern if you use @@ without -f and let afl-fuzz come up with the
file name.
3) Multi-system parallelization
-------------------------------
The basic operating principle for multi-system parallelization is similar to
the mechanism explained in section 2. The key difference is that you need to
write a simple script that performs two actions:
- Uses SSH with authorized_keys to connect to every machine and retrieve
a tar archive of the /path/to/sync_dir/<fuzzer_id>/queue/ directories for
every <fuzzer_id> local to the machine. It's best to use a naming scheme
that includes host name in the fuzzer ID, so that you can do something
like:
for s in {1..10}; do
ssh user@host${s} "tar -czf - sync/host${s}_fuzzid*/[qf]*" >host${s}.tgz
done
- Distributes and unpacks these files on all the remaining machines, e.g.:
for s in {1..10}; do
for d in {1..10}; do
test "$s" = "$d" && continue
ssh user@host${d} 'tar -kxzf -' <host${s}.tgz
done
done
There is an example of such a script in experimental/distributed_fuzzing/;
you can also find a more featured, experimental tool developed by
Martijn Bogaard at:
https://github.com/MartijnB/disfuzz-afl
Another client-server implementation from Richo Healey is:
https://github.com/richo/roving
Note that these third-party tools are unsafe to run on systems exposed to the
Internet or to untrusted users.
When developing custom test case sync code, there are several optimizations
to keep in mind:
- The synchronization does not have to happen very often; running the
task every 30 minutes or so may be perfectly fine.
- There is no need to synchronize crashes/ or hangs/; you only need to
copy over queue/* (and ideally, also fuzzer_stats).
- It is not necessary (and not advisable!) to overwrite existing files;
the -k option in tar is a good way to avoid that.
- There is no need to fetch directories for fuzzers that are not running
locally on a particular machine, and were simply copied over onto that
system during earlier runs.
- For large fleets, you will want to consolidate tarballs for each host,
as this will let you use n SSH connections for sync, rather than n*(n-1).
You may also want to implement staged synchronization. For example, you
could have 10 groups of systems, with group 1 pushing test cases only
to group 2; group 2 pushing them only to group 3; and so on, with group
eventually 10 feeding back to group 1.
This arrangement would allow test interesting cases to propagate across
the fleet without having to copy every fuzzer queue to every single host.
- You do not want a "master" instance of afl-fuzz on every system; you should
run them all with -S, and just designate a single process somewhere within
the fleet to run with -M.
It is *not* advisable to skip the synchronization script and run the fuzzers
directly on a network filesystem; unexpected latency and unkillable processes
in I/O wait state can mess things up.
4) Remote monitoring and data collection
----------------------------------------
You can use screen, nohup, tmux, or something equivalent to run remote
instances of afl-fuzz. If you redirect the program's output to a file, it will
automatically switch from a fancy UI to more limited status reports. There is
also basic machine-readable information always written to the fuzzer_stats file
in the output directory. Locally, that information can be interpreted with
afl-whatsup.
In principle, you can use the status screen of the master (-M) instance to
monitor the overall fuzzing progress and decide when to stop. In this
mode, the most important signal is just that no new paths are being found
for a longer while. If you do not have a master instance, just pick any
single secondary instance to watch and go by that.
You can also rely on that instance's output directory to collect the
synthesized corpus that covers all the noteworthy paths discovered anywhere
within the fleet. Secondary (-S) instances do not require any special
monitoring, other than just making sure that they are up.
Keep in mind that crashing inputs are *not* automatically propagated to the
master instance, so you may still want to monitor for crashes fleet-wide
from within your synchronization or health checking scripts (see afl-whatsup).
5) Asymmetric setups
--------------------
It is perhaps worth noting that all of the following is permitted:
- Running afl-fuzz with conjunction with other guided tools that can extend
coverage (e.g., via concolic execution). Third-party tools simply need to
follow the protocol described above for pulling new test cases from
out_dir/<fuzzer_id>/queue/* and writing their own finds to sequentially
numbered id:nnnnnn files in out_dir/<ext_tool_id>/queue/*.
- Running some of the synchronized fuzzers with different (but related)
target binaries. For example, simultaneously stress-testing several
different JPEG parsers (say, IJG jpeg and libjpeg-turbo) while sharing
the discovered test cases can have synergistic effects and improve the
overall coverage.
(In this case, running one -M instance per each binary is a good plan.)
- Having some of the fuzzers invoke the binary in different ways.
For example, 'djpeg' supports several DCT modes, configurable with
a command-line flag, while 'dwebp' supports incremental and one-shot
decoding. In some scenarios, going after multiple distinct modes and then
pooling test cases will improve coverage.
- Much less convincingly, running the synchronized fuzzers with different
starting test cases (e.g., progressive and standard JPEG) or dictionaries.
The synchronization mechanism ensures that the test sets will get fairly
homogeneous over time, but it introduces some initial variability.

@ -0,0 +1,201 @@
=================================
Tips for performance optimization
=================================
This file provides tips for troubleshooting slow or wasteful fuzzing jobs.
See README for the general instruction manual.
1) Keep your test cases small
-----------------------------
This is probably the single most important step to take! Large test cases do
not merely take more time and memory to be parsed by the tested binary, but
also make the fuzzing process dramatically less efficient in several other
ways.
To illustrate, let's say that you're randomly flipping bits in a file, one bit
at a time. Let's assume that if you flip bit #47, you will hit a security bug;
flipping any other bit just results in an invalid document.
Now, if your starting test case is 100 bytes long, you will have a 71% chance of
triggering the bug within the first 1,000 execs - not bad! But if the test case
is 1 kB long, the probability that we will randomly hit the right pattern in
the same timeframe goes down to 11%. And if it has 10 kB of non-essential
cruft, the odds plunge to 1%.
On top of that, with larger inputs, the binary may be now running 5-10x times
slower than before - so the overall drop in fuzzing efficiency may be easily
as high as 500x or so.
In practice, this means that you shouldn't fuzz image parsers with your
vacation photos. Generate a tiny 16x16 picture instead, and run it through
jpegtran or pngcrunch for good measure. The same goes for most other types
of documents.
There's plenty of small starting test cases in ../testcases/* - try them out
or submit new ones!
If you want to start with a larger, third-party corpus, run afl-cmin with an
aggressive timeout on that data set first.
2) Use a simpler target
-----------------------
Consider using a simpler target binary in your fuzzing work. For example, for
image formats, bundled utilities such as djpeg, readpng, or gifhisto are
considerably (10-20x) faster than the convert tool from ImageMagick - all while
exercising roughly the same library-level image parsing code.
Even if you don't have a lightweight harness for a particular target, remember
that you can always use another, related library to generate a corpus that will
be then manually fed to a more resource-hungry program later on.
3) Use LLVM instrumentation
---------------------------
When fuzzing slow targets, you can gain 2x performance improvement by using
the LLVM-based instrumentation mode described in llvm_mode/README.llvm. Note
that this mode requires the use of clang and will not work with GCC.
The LLVM mode also offers a "persistent", in-process fuzzing mode that can
work well for certain types of self-contained libraries, and for fast targets,
can offer performance gains up to 5-10x; and a "deferred fork server" mode
that can offer huge benefits for programs with high startup overhead. Both
modes require you to edit the source code of the fuzzed program, but the
changes often amount to just strategically placing a single line or two.
4) Profile and optimize the binary
----------------------------------
Check for any parameters or settings that obviously improve performance. For
example, the djpeg utility that comes with IJG jpeg and libjpeg-turbo can be
called with:
-dct fast -nosmooth -onepass -dither none -scale 1/4
...and that will speed things up. There is a corresponding drop in the quality
of decoded images, but it's probably not something you care about.
In some programs, it is possible to disable output altogether, or at least use
an output format that is computationally inexpensive. For example, with image
transcoding tools, converting to a BMP file will be a lot faster than to PNG.
With some laid-back parsers, enabling "strict" mode (i.e., bailing out after
first error) may result in smaller files and improved run time without
sacrificing coverage; for example, for sqlite, you may want to specify -bail.
If the program is still too slow, you can use strace -tt or an equivalent
profiling tool to see if the targeted binary is doing anything silly.
Sometimes, you can speed things up simply by specifying /dev/null as the
config file, or disabling some compile-time features that aren't really needed
for the job (try ./configure --help). One of the notoriously resource-consuming
things would be calling other utilities via exec*(), popen(), system(), or
equivalent calls; for example, tar can invoke external decompression tools
when it decides that the input file is a compressed archive.
Some programs may also intentionally call sleep(), usleep(), or nanosleep();
vim is a good example of that. Other programs may attempt fsync() and so on.
There are third-party libraries that make it easy to get rid of such code,
e.g.:
https://launchpad.net/libeatmydata
In programs that are slow due to unavoidable initialization overhead, you may
want to try the LLVM deferred forkserver mode (see llvm_mode/README.llvm),
which can give you speed gains up to 10x, as mentioned above.
Last but not least, if you are using ASAN and the performance is unacceptable,
consider turning it off for now, and manually examining the generated corpus
with an ASAN-enabled binary later on.
5) Instrument just what you need
--------------------------------
Instrument just the libraries you actually want to stress-test right now, one
at a time. Let the program use system-wide, non-instrumented libraries for
any functionality you don't actually want to fuzz. For example, in most
cases, it doesn't make to instrument libgmp just because you're testing a
crypto app that relies on it for bignum math.
Beware of programs that come with oddball third-party libraries bundled with
their source code (Spidermonkey is a good example of this). Check ./configure
options to use non-instrumented system-wide copies instead.
6) Parallelize your fuzzers
---------------------------
The fuzzer is designed to need ~1 core per job. This means that on a, say,
4-core system, you can easily run four parallel fuzzing jobs with relatively
little performance hit. For tips on how to do that, see parallel_fuzzing.txt.
The afl-gotcpu utility can help you understand if you still have idle CPU
capacity on your system. (It won't tell you about memory bandwidth, cache
misses, or similar factors, but they are less likely to be a concern.)
7) Keep memory use and timeouts in check
----------------------------------------
If you have increased the -m or -t limits more than truly necessary, consider
dialing them back down.
For programs that are nominally very fast, but get sluggish for some inputs,
you can also try setting -t values that are more punishing than what afl-fuzz
dares to use on its own. On fast and idle machines, going down to -t 5 may be
a viable plan.
The -m parameter is worth looking at, too. Some programs can end up spending
a fair amount of time allocating and initializing megabytes of memory when
presented with pathological inputs. Low -m values can make them give up sooner
and not waste CPU time.
8) Check OS configuration
-------------------------
There are several OS-level factors that may affect fuzzing speed:
- High system load. Use idle machines where possible. Kill any non-essential
CPU hogs (idle browser windows, media players, complex screensavers, etc).
- Network filesystems, either used for fuzzer input / output, or accessed by
the fuzzed binary to read configuration files (pay special attention to the
home directory - many programs search it for dot-files).
- On-demand CPU scaling. The Linux 'ondemand' governor performs its analysis
on a particular schedule and is known to underestimate the needs of
short-lived processes spawned by afl-fuzz (or any other fuzzer). On Linux,
this can be fixed with:
cd /sys/devices/system/cpu
echo performance | tee cpu*/cpufreq/scaling_governor
On other systems, the impact of CPU scaling will be different; when fuzzing,
use OS-specific tools to find out if all cores are running at full speed.
- Transparent huge pages. Some allocators, such as jemalloc, can incur a
heavy fuzzing penalty when transparent huge pages (THP) are enabled in the
kernel. You can disable this via:
echo never > /sys/kernel/mm/transparent_hugepage/enabled
- Suboptimal scheduling strategies. The significance of this will vary from
one target to another, but on Linux, you may want to make sure that the
following options are set:
echo 1 >/proc/sys/kernel/sched_child_runs_first
echo 1 >/proc/sys/kernel/sched_autogroup_enabled
Setting a different scheduling policy for the fuzzer process - say
SCHED_RR - can usually speed things up, too, but needs to be done with
care.
9) If all other options fail, use -d
------------------------------------
For programs that are genuinely slow, in cases where you really can't escape
using huge input files, or when you simply want to get quick and dirty results
early on, you can always resort to the -d mode.
The mode causes afl-fuzz to skip all the deterministic fuzzing steps, which
makes output a lot less neat and can ultimately make the testing a bit less
in-depth, but it will give you an experience more familiar from other fuzzing
tools.

@ -0,0 +1,354 @@
===============
Sister projects
===============
This doc lists some of the projects that are inspired by, derived from,
designed for, or meant to integrate with AFL. See README for the general
instruction manual.
-------------------------------------------
Support for other languages / environments:
-------------------------------------------
Python AFL (Jakub Wilk)
-----------------------
Allows fuzz-testing of Python programs. Uses custom instrumentation and its
own forkserver.
http://jwilk.net/software/python-afl
Go-fuzz (Dmitry Vyukov)
-----------------------
AFL-inspired guided fuzzing approach for Go targets:
https://github.com/dvyukov/go-fuzz
afl.rs (Keegan McAllister)
--------------------------
Allows Rust features to be easily fuzzed with AFL (using the LLVM mode).
https://github.com/kmcallister/afl.rs
OCaml support (KC Sivaramakrishnan)
-----------------------------------
Adds AFL-compatible instrumentation to OCaml programs.
https://github.com/ocamllabs/opam-repo-dev/pull/23
http://canopy.mirage.io/Posts/Fuzzing
AFL for GCJ Java and other GCC frontends (-)
--------------------------------------------
GCC Java programs are actually supported out of the box - simply rename
afl-gcc to afl-gcj. Unfortunately, by default, unhandled exceptions in GCJ do
not result in abort() being called, so you will need to manually add a
top-level exception handler that exits with SIGABRT or something equivalent.
Other GCC-supported languages should be fairly easy to get working, but may
face similar problems. See https://gcc.gnu.org/frontends.html for a list of
options.
AFL-style in-process fuzzer for LLVM (Kostya Serebryany)
--------------------------------------------------------
Provides an evolutionary instrumentation-guided fuzzing harness that allows
some programs to be fuzzed without the fork / execve overhead. (Similar
functionality is now available as the "persistent" feature described in
../llvm_mode/README.llvm.)
http://llvm.org/docs/LibFuzzer.html
AFL fixup shim (Ben Nagy)
-------------------------
Allows AFL_POST_LIBRARY postprocessors to be written in arbitrary languages
that don't have C / .so bindings. Includes examples in Go.
https://github.com/bnagy/aflfix
TriforceAFL (Tim Newsham and Jesse Hertz)
-----------------------------------------
Leverages QEMU full system emulation mode to allow AFL to target operating
systems and other alien worlds:
https://www.nccgroup.trust/us/about-us/newsroom-and-events/blog/2016/june/project-triforce-run-afl-on-everything/
WinAFL (Ivan Fratric)
---------------------
As the name implies, allows you to fuzz Windows binaries (using DynamoRio).
https://github.com/ivanfratric/winafl
Another Windows alternative may be:
https://github.com/carlosgprado/BrundleFuzz/
----------------
Network fuzzing:
----------------
Preeny (Yan Shoshitaishvili)
----------------------------
Provides a fairly simple way to convince dynamically linked network-centric
programs to read from a file or not fork. Not AFL-specific, but described as
useful by many users. Some assembly required.
https://github.com/zardus/preeny
-------------------------------------------
Distributed fuzzing and related automation:
-------------------------------------------
roving (Richo Healey)
---------------------
A client-server architecture for effortlessly orchestrating AFL runs across
a fleet of machines. You don't want to use this on systems that face the
Internet or live in other untrusted environments.
https://github.com/richo/roving
Distfuzz-AFL (Martijn Bogaard)
------------------------------
Simplifies the management of afl-fuzz instances on remote machines. The
author notes that the current implementation isn't secure and should not
be exposed on the Internet.
https://github.com/MartijnB/disfuzz-afl
AFLDFF (quantumvm)
------------------
A nice GUI for managing AFL jobs.
https://github.com/quantumvm/AFLDFF
afl-launch (Ben Nagy)
---------------------
Batch AFL launcher utility with a simple CLI.
https://github.com/bnagy/afl-launch
AFL Utils (rc0r)
----------------
Simplifies the triage of discovered crashes, start parallel instances, etc.
https://github.com/rc0r/afl-utils
Another crash triage tool:
https://github.com/floyd-fuh/afl-crash-analyzer
afl-fuzzing-scripts (Tobias Ospelt)
-----------------------------------
Simplifies starting up multiple parallel AFL jobs.
https://github.com/floyd-fuh/afl-fuzzing-scripts/
afl-sid (Jacek Wielemborek)
---------------------------
Allows users to more conveniently build and deploy AFL via Docker.
https://github.com/d33tah/afl-sid
Another Docker-related project:
https://github.com/ozzyjohnson/docker-afl
afl-monitor (Paul S. Ziegler)
-----------------------------
Provides more detailed and versatile statistics about your running AFL jobs.
https://github.com/reflare/afl-monitor
-----------------------------------------------------------
Crash triage, coverage analysis, and other companion tools:
-----------------------------------------------------------
afl-crash-analyzer (Tobias Ospelt)
----------------------------------
Makes it easier to navigate and annotate crashing test cases.
https://github.com/floyd-fuh/afl-crash-analyzer/
Crashwalk (Ben Nagy)
--------------------
AFL-aware tool to annotate and sort through crashing test cases.
https://github.com/bnagy/crashwalk
afl-cov (Michael Rash)
----------------------
Produces human-readable coverage data based on the output queue of afl-fuzz.
https://github.com/mrash/afl-cov
afl-sancov (Bhargava Shastry)
-----------------------------
Similar to afl-cov, but uses clang sanitizer instrumentation.
https://github.com/bshastry/afl-sancov
RecidiVM (Jakub Wilk)
---------------------
Makes it easy to estimate memory usage limits when fuzzing with ASAN or MSAN.
http://jwilk.net/software/recidivm
aflize (Jacek Wielemborek)
--------------------------
Automatically build AFL-enabled versions of Debian packages.
https://github.com/d33tah/aflize
afl-ddmin-mod (Markus Teufelberger)
-----------------------------------
A variant of afl-tmin that uses a more sophisticated (but slower)
minimization algorithm.
https://github.com/MarkusTeufelberger/afl-ddmin-mod
afl-kit (Kuang-che Wu)
----------------------
Replacements for afl-cmin and afl-tmin with additional features, such
as the ability to filter crashes based on stderr patterns.
https://github.com/kcwu/afl-kit
-------------------------------
Narrow-purpose or experimental:
-------------------------------
Cygwin support (Ali Rizvi-Santiago)
-----------------------------------
Pretty self-explanatory. As per the author, this "mostly" ports AFL to
Windows. Field reports welcome!
https://github.com/arizvisa/afl-cygwin
Pause and resume scripts (Ben Nagy)
-----------------------------------
Simple automation to suspend and resume groups of fuzzing jobs.
https://github.com/bnagy/afl-trivia
Static binary-only instrumentation (Aleksandar Nikolich)
--------------------------------------------------------
Allows black-box binaries to be instrumented statically (i.e., by modifying
the binary ahead of the time, rather than translating it on the run). Author
reports better performance compared to QEMU, but occasional translation
errors with stripped binaries.
https://github.com/vrtadmin/moflow/tree/master/afl-dyninst
AFL PIN (Parker Thompson)
-------------------------
Early-stage Intel PIN instrumentation support (from before we settled on
faster-running QEMU).
https://github.com/mothran/aflpin
AFL-style instrumentation in llvm (Kostya Serebryany)
-----------------------------------------------------
Allows AFL-equivalent instrumentation to be injected at compiler level.
This is currently not supported by AFL as-is, but may be useful in other
projects.
https://code.google.com/p/address-sanitizer/wiki/AsanCoverage#Coverage_counters
AFL JS (Han Choongwoo)
----------------------
One-off optimizations to speed up the fuzzing of JavaScriptCore (now likely
superseded by LLVM deferred forkserver init - see llvm_mode/README.llvm).
https://github.com/tunz/afl-fuzz-js
AFL harness for fwknop (Michael Rash)
-------------------------------------
An example of a fairly involved integration with AFL.
https://github.com/mrash/fwknop/tree/master/test/afl
Building harnesses for DNS servers (Jonathan Foote, Ron Bowes)
--------------------------------------------------------------
Two articles outlining the general principles and showing some example code.
https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
https://goo.gl/j9EgFf
Fuzzer shell for SQLite (Richard Hipp)
--------------------------------------
A simple SQL shell designed specifically for fuzzing the underlying library.
http://www.sqlite.org/src/artifact/9e7e273da2030371
Support for Python mutation modules (Christian Holler)
------------------------------------------------------
https://github.com/choller/afl/blob/master/docs/mozilla/python_modules.txt
Support for selective instrumentation (Christian Holler)
--------------------------------------------------------
https://github.com/choller/afl/blob/master/docs/mozilla/partial_instrumentation.txt
Kernel fuzzing (Dmitry Vyukov)
------------------------------
A similar guided approach as applied to fuzzing syscalls:
https://github.com/google/syzkaller/wiki/Found-Bugs
https://github.com/dvyukov/linux/commit/33787098ffaaa83b8a7ccf519913ac5fd6125931
http://events.linuxfoundation.org/sites/events/files/slides/AFL%20filesystem%20fuzzing%2C%20Vault%202016_0.pdf
Android support (ele7enxxh)
---------------------------
Based on a somewhat dated version of AFL:
https://github.com/ele7enxxh/android-afl
CGI wrapper (floyd)
-------------------
Facilitates the testing of CGI scripts.
https://github.com/floyd-fuh/afl-cgi-wrapper
Fuzzing difficulty estimation (Marcel Boehme)
---------------------------------------------
A fork of AFL that tries to quantify the likelihood of finding additional
paths or crashes at any point in a fuzzing job.
https://github.com/mboehme/pythia

@ -0,0 +1,408 @@
===============================
Understanding the status screen
===============================
This document provides an overview of the status screen - plus tips for
troubleshooting any warnings and red text shown in the UI. See README for
the general instruction manual.
0) A note about colors
----------------------
The status screen and error messages use colors to keep things readable and
attract your attention to the most important details. For example, red almost
always means "consult this doc" :-)
Unfortunately, the UI will render correctly only if your terminal is using
traditional un*x palette (white text on black background) or something close
to that.
If you are using inverse video, you may want to change your settings, say:
- For GNOME Terminal, go to Edit > Profile preferences, select the "colors"
tab, and from the list of built-in schemes, choose "white on black".
- For the MacOS X Terminal app, open a new window using the "Pro" scheme via
the Shell > New Window menu (or make "Pro" your default).
Alternatively, if you really like your current colors, you can edit config.h
to comment out USE_COLORS, then do 'make clean all'.
I'm not aware of any other simple way to make this work without causing
other side effects - sorry about that.
With that out of the way, let's talk about what's actually on the screen...
1) Process timing
-----------------
+----------------------------------------------------+
| run time : 0 days, 8 hrs, 32 min, 43 sec |
| last new path : 0 days, 0 hrs, 6 min, 40 sec |
| last uniq crash : none seen yet |
| last uniq hang : 0 days, 1 hrs, 24 min, 32 sec |
+----------------------------------------------------+
This section is fairly self-explanatory: it tells you how long the fuzzer has
been running and how much time has elapsed since its most recent finds. This is
broken down into "paths" (a shorthand for test cases that trigger new execution
patterns), crashes, and hangs.
When it comes to timing: there is no hard rule, but most fuzzing jobs should be
expected to run for days or weeks; in fact, for a moderately complex project, the
first pass will probably take a day or so. Every now and then, some jobs
will be allowed to run for months.
There's one important thing to watch out for: if the tool is not finding new
paths within several minutes of starting, you're probably not invoking the
target binary correctly and it never gets to parse the input files we're
throwing at it; another possible explanations are that the default memory limit
(-m) is too restrictive, and the program exits after failing to allocate a
buffer very early on; or that the input files are patently invalid and always
fail a basic header check.
If there are no new paths showing up for a while, you will eventually see a big
red warning in this section, too :-)
2) Overall results
------------------
+-----------------------+
| cycles done : 0 |
| total paths : 2095 |
| uniq crashes : 0 |
| uniq hangs : 19 |
+-----------------------+
The first field in this section gives you the count of queue passes done so far
- that is, the number of times the fuzzer went over all the interesting test
cases discovered so far, fuzzed them, and looped back to the very beginning.
Every fuzzing session should be allowed to complete at least one cycle; and
ideally, should run much longer than that.
As noted earlier, the first pass can take a day or longer, so sit back and
relax. If you want to get broader but more shallow coverage right away, try
the -d option - it gives you a more familiar experience by skipping the
deterministic fuzzing steps. It is, however, inferior to the standard mode in
a couple of subtle ways.
To help make the call on when to hit Ctrl-C, the cycle counter is color-coded.
It is shown in magenta during the first pass, progresses to yellow if new finds
are still being made in subsequent rounds, then blue when that ends - and
finally, turns green after the fuzzer hasn't been seeing any action for a
longer while.
The remaining fields in this part of the screen should be pretty obvious:
there's the number of test cases ("paths") discovered so far, and the number of
unique faults. The test cases, crashes, and hangs can be explored in real-time
by browsing the output directory, as discussed in the README.
3) Cycle progress
-----------------
+-------------------------------------+
| now processing : 1296 (61.86%) |
| paths timed out : 0 (0.00%) |
+-------------------------------------+
This box tells you how far along the fuzzer is with the current queue cycle: it
shows the ID of the test case it is currently working on, plus the number of
inputs it decided to ditch because they were persistently timing out.
The "*" suffix sometimes shown in the first line means that the currently
processed path is not "favored" (a property discussed later on, in section 6).
If you feel that the fuzzer is progressing too slowly, see the note about the
-d option in section 2 of this doc.
4) Map coverage
---------------
+--------------------------------------+
| map density : 10.15% / 29.07% |
| count coverage : 4.03 bits/tuple |
+--------------------------------------+
The section provides some trivia about the coverage observed by the
instrumentation embedded in the target binary.
The first line in the box tells you how many branch tuples we have already
hit, in proportion to how much the bitmap can hold. The number on the left
describes the current input; the one on the right is the value for the entire
input corpus.
Be wary of extremes:
- Absolute numbers below 200 or so suggest one of three things: that the
program is extremely simple; that it is not instrumented properly (e.g.,
due to being linked against a non-instrumented copy of the target
library); or that it is bailing out prematurely on your input test cases.
The fuzzer will try to mark this in pink, just to make you aware.
- Percentages over 70% may very rarely happen with very complex programs
that make heavy use of template-generated code.
Because high bitmap density makes it harder for the fuzzer to reliably
discern new program states, I recommend recompiling the binary with
AFL_INST_RATIO=10 or so and trying again (see env_variables.txt).
The fuzzer will flag high percentages in red. Chances are, you will never
see that unless you're fuzzing extremely hairy software (say, v8, perl,
ffmpeg).
The other line deals with the variability in tuple hit counts seen in the
binary. In essence, if every taken branch is always taken a fixed number of
times for all the inputs we have tried, this will read "1.00". As we manage
to trigger other hit counts for every branch, the needle will start to move
toward "8.00" (every bit in the 8-bit map hit), but will probably never
reach that extreme.
Together, the values can be useful for comparing the coverage of several
different fuzzing jobs that rely on the same instrumented binary.
5) Stage progress
-----------------
+-------------------------------------+
| now trying : interest 32/8 |
| stage execs : 3996/34.4k (11.62%) |
| total execs : 27.4M |
| exec speed : 891.7/sec |
+-------------------------------------+
This part gives you an in-depth peek at what the fuzzer is actually doing right
now. It tells you about the current stage, which can be any of:
- calibration - a pre-fuzzing stage where the execution path is examined
to detect anomalies, establish baseline execution speed, and so on. Executed
very briefly whenever a new find is being made.
- trim L/S - another pre-fuzzing stage where the test case is trimmed to the
shortest form that still produces the same execution path. The length (L)
and stepover (S) are chosen in general relationship to file size.
- bitflip L/S - deterministic bit flips. There are L bits toggled at any given
time, walking the input file with S-bit increments. The current L/S variants
are: 1/1, 2/1, 4/1, 8/8, 16/8, 32/8.
- arith L/8 - deterministic arithmetics. The fuzzer tries to subtract or add
small integers to 8-, 16-, and 32-bit values. The stepover is always 8 bits.
- interest L/8 - deterministic value overwrite. The fuzzer has a list of known
"interesting" 8-, 16-, and 32-bit values to try. The stepover is 8 bits.
- extras - deterministic injection of dictionary terms. This can be shown as
"user" or "auto", depending on whether the fuzzer is using a user-supplied
dictionary (-x) or an auto-created one. You will also see "over" or "insert",
depending on whether the dictionary words overwrite existing data or are
inserted by offsetting the remaining data to accommodate their length.
- havoc - a sort-of-fixed-length cycle with stacked random tweaks. The
operations attempted during this stage include bit flips, overwrites with
random and "interesting" integers, block deletion, block duplication, plus
assorted dictionary-related operations (if a dictionary is supplied in the
first place).
- splice - a last-resort strategy that kicks in after the first full queue
cycle with no new paths. It is equivalent to 'havoc', except that it first
splices together two random inputs from the queue at some arbitrarily
selected midpoint.
- sync - a stage used only when -M or -S is set (see parallel_fuzzing.txt).
No real fuzzing is involved, but the tool scans the output from other
fuzzers and imports test cases as necessary. The first time this is done,
it may take several minutes or so.
The remaining fields should be fairly self-evident: there's the exec count
progress indicator for the current stage, a global exec counter, and a
benchmark for the current program execution speed. This may fluctuate from
one test case to another, but the benchmark should be ideally over 500 execs/sec
most of the time - and if it stays below 100, the job will probably take very
long.
The fuzzer will explicitly warn you about slow targets, too. If this happens,
see the perf_tips.txt file included with the fuzzer for ideas on how to speed
things up.
6) Findings in depth
--------------------
+--------------------------------------+
| favored paths : 879 (41.96%) |
| new edges on : 423 (20.19%) |
| total crashes : 0 (0 unique) |
| total tmouts : 24 (19 unique) |
+--------------------------------------+
This gives you several metrics that are of interest mostly to complete nerds.
The section includes the number of paths that the fuzzer likes the most based
on a minimization algorithm baked into the code (these will get considerably
more air time), and the number of test cases that actually resulted in better
edge coverage (versus just pushing the branch hit counters up). There are also
additional, more detailed counters for crashes and timeouts.
Note that the timeout counter is somewhat different from the hang counter; this
one includes all test cases that exceeded the timeout, even if they did not
exceed it by a margin sufficient to be classified as hangs.
7) Fuzzing strategy yields
--------------------------
+-----------------------------------------------------+
| bit flips : 57/289k, 18/289k, 18/288k |
| byte flips : 0/36.2k, 4/35.7k, 7/34.6k |
| arithmetics : 53/2.54M, 0/537k, 0/55.2k |
| known ints : 8/322k, 12/1.32M, 10/1.70M |
| dictionary : 9/52k, 1/53k, 1/24k |
| havoc : 1903/20.0M, 0/0 |
| trim : 20.31%/9201, 17.05% |
+-----------------------------------------------------+
This is just another nerd-targeted section keeping track of how many paths we
have netted, in proportion to the number of execs attempted, for each of the
fuzzing strategies discussed earlier on. This serves to convincingly validate
assumptions about the usefulness of the various approaches taken by afl-fuzz.
The trim strategy stats in this section are a bit different than the rest.
The first number in this line shows the ratio of bytes removed from the input
files; the second one corresponds to the number of execs needed to achieve this
goal. Finally, the third number shows the proportion of bytes that, although
not possible to remove, were deemed to have no effect and were excluded from
some of the more expensive deterministic fuzzing steps.
8) Path geometry
----------------
+---------------------+
| levels : 5 |
| pending : 1570 |
| pend fav : 583 |
| own finds : 0 |
| imported : 0 |
| stability : 100.00% |
+---------------------+
The first field in this section tracks the path depth reached through the
guided fuzzing process. In essence: the initial test cases supplied by the
user are considered "level 1". The test cases that can be derived from that
through traditional fuzzing are considered "level 2"; the ones derived by
using these as inputs to subsequent fuzzing rounds are "level 3"; and so forth.
The maximum depth is therefore a rough proxy for how much value you're getting
out of the instrumentation-guided approach taken by afl-fuzz.
The next field shows you the number of inputs that have not gone through any
fuzzing yet. The same stat is also given for "favored" entries that the fuzzer
really wants to get to in this queue cycle (the non-favored entries may have to
wait a couple of cycles to get their chance).
Next, we have the number of new paths found during this fuzzing section and
imported from other fuzzer instances when doing parallelized fuzzing; and the
extent to which identical inputs appear to sometimes produce variable behavior
in the tested binary.
That last bit is actually fairly interesting: it measures the consistency of
observed traces. If a program always behaves the same for the same input data,
it will earn a score of 100%. When the value is lower but still shown in purple,
the fuzzing process is unlikely to be negatively affected. If it goes into red,
you may be in trouble, since AFL will have difficulty discerning between
meaningful and "phantom" effects of tweaking the input file.
Now, most targets will just get a 100% score, but when you see lower figures,
there are several things to look at:
- The use of uninitialized memory in conjunction with some intrinsic sources
of entropy in the tested binary. Harmless to AFL, but could be indicative
of a security bug.
- Attempts to manipulate persistent resources, such as left over temporary
files or shared memory objects. This is usually harmless, but you may want
to double-check to make sure the program isn't bailing out prematurely.
Running out of disk space, SHM handles, or other global resources can
trigger this, too.
- Hitting some functionality that is actually designed to behave randomly.
Generally harmless. For example, when fuzzing sqlite, an input like
'select random();' will trigger a variable execution path.
- Multiple threads executing at once in semi-random order. This is harmless
when the 'stability' metric stays over 90% or so, but can become an issue
if not. Here's what to try:
- Use afl-clang-fast from llvm_mode/ - it uses a thread-local tracking
model that is less prone to concurrency issues,
- See if the target can be compiled or run without threads. Common
./configure options include --without-threads, --disable-pthreads, or
--disable-openmp.
- Replace pthreads with GNU Pth (https://www.gnu.org/software/pth/), which
allows you to use a deterministic scheduler.
- In persistent mode, minor drops in the "stability" metric can be normal,
because not all the code behaves identically when re-entered; but major
dips may signify that the code within __AFL_LOOP() is not behaving
correctly on subsequent iterations (e.g., due to incomplete clean-up or
reinitialization of the state) and that most of the fuzzing effort goes
to waste.
The paths where variable behavior is detected are marked with a matching entry
in the <out_dir>/queue/.state/variable_behavior/ directory, so you can look
them up easily.
9) CPU load
-----------
[cpu: 25%]
This tiny widget shows the apparent CPU utilization on the local system. It is
calculated by taking the number of processes in the "runnable" state, and then
comparing it to the number of logical cores on the system.
If the value is shown in green, you are using fewer CPU cores than available on
your system and can probably parallelize to improve performance; for tips on
how to do that, see parallel_fuzzing.txt.
If the value is shown in red, your CPU is *possibly* oversubscribed, and
running additional fuzzers may not give you any benefits.
Of course, this benchmark is very simplistic; it tells you how many processes
are ready to run, but not how resource-hungry they may be. It also doesn't
distinguish between physical cores, logical cores, and virtualized CPUs; the
performance characteristics of each of these will differ quite a bit.
If you want a more accurate measurement, you can run the afl-gotcpu utility
from the command line.
10) Addendum: status and plot files
-----------------------------------
For unattended operation, some of the key status screen information can be also
found in a machine-readable format in the fuzzer_stats file in the output
directory. This includes:
- start_time - unix time indicating the start time of afl-fuzz
- last_update - unix time corresponding to the last update of this file
- fuzzer_pid - PID of the fuzzer process
- cycles_done - queue cycles completed so far
- execs_done - number of execve() calls attempted
- execs_per_sec - current number of execs per second
- paths_total - total number of entries in the queue
- paths_found - number of entries discovered through local fuzzing
- paths_imported - number of entries imported from other instances
- max_depth - number of levels in the generated data set
- cur_path - currently processed entry number
- pending_favs - number of favored entries still waiting to be fuzzed
- pending_total - number of all entries waiting to be fuzzed
- stability - percentage of bitmap bytes that behave consistently
- variable_paths - number of test cases showing variable behavior
- unique_crashes - number of unique crashes recorded
- unique_hangs - number of unique hangs encountered
- command_line - full command line used for the fuzzing session
- slowest_exec_ms- real time of the slowest execution in ms
- peak_rss_mb - max rss usage reached during fuzzing in mb
Most of these map directly to the UI elements discussed earlier on.
On top of that, you can also find an entry called 'plot_data', containing a
plottable history for most of these fields. If you have gnuplot installed, you
can turn this into a nice progress report with the included 'afl-plot' tool.

@ -0,0 +1,563 @@
===================================
Technical "whitepaper" for afl-fuzz
===================================
This document provides a quick overview of the guts of American Fuzzy Lop.
See README for the general instruction manual; and for a discussion of
motivations and design goals behind AFL, see historical_notes.txt.
0) Design statement
-------------------
American Fuzzy Lop does its best not to focus on any singular principle of
operation and not be a proof-of-concept for any specific theory. The tool can
be thought of as a collection of hacks that have been tested in practice,
found to be surprisingly effective, and have been implemented in the simplest,
most robust way I could think of at the time.
Many of the resulting features are made possible thanks to the availability of
lightweight instrumentation that served as a foundation for the tool, but this
mechanism should be thought of merely as a means to an end. The only true
governing principles are speed, reliability, and ease of use.
1) Coverage measurements
------------------------
The instrumentation injected into compiled programs captures branch (edge)
coverage, along with coarse branch-taken hit counts. The code injected at
branch points is essentially equivalent to:
cur_location = <COMPILE_TIME_RANDOM>;
shared_mem[cur_location ^ prev_location]++;
prev_location = cur_location >> 1;
The cur_location value is generated randomly to simplify the process of
linking complex projects and keep the XOR output distributed uniformly.
The shared_mem[] array is a 64 kB SHM region passed to the instrumented binary
by the caller. Every byte set in the output map can be thought of as a hit for
a particular (branch_src, branch_dst) tuple in the instrumented code.
The size of the map is chosen so that collisions are sporadic with almost all
of the intended targets, which usually sport between 2k and 10k discoverable
branch points:
Branch cnt | Colliding tuples | Example targets
------------+------------------+-----------------
1,000 | 0.75% | giflib, lzo
2,000 | 1.5% | zlib, tar, xz
5,000 | 3.5% | libpng, libwebp
10,000 | 7% | libxml
20,000 | 14% | sqlite
50,000 | 30% | -
At the same time, its size is small enough to allow the map to be analyzed
in a matter of microseconds on the receiving end, and to effortlessly fit
within L2 cache.
This form of coverage provides considerably more insight into the execution
path of the program than simple block coverage. In particular, it trivially
distinguishes between the following execution traces:
A -> B -> C -> D -> E (tuples: AB, BC, CD, DE)
A -> B -> D -> C -> E (tuples: AB, BD, DC, CE)
This aids the discovery of subtle fault conditions in the underlying code,
because security vulnerabilities are more often associated with unexpected
or incorrect state transitions than with merely reaching a new basic block.
The reason for the shift operation in the last line of the pseudocode shown
earlier in this section is to preserve the directionality of tuples (without
this, A ^ B would be indistinguishable from B ^ A) and to retain the identity
of tight loops (otherwise, A ^ A would be obviously equal to B ^ B).
The absence of simple saturating arithmetic opcodes on Intel CPUs means that
the hit counters can sometimes wrap around to zero. Since this is a fairly
unlikely and localized event, it's seen as an acceptable performance trade-off.
2) Detecting new behaviors
--------------------------
The fuzzer maintains a global map of tuples seen in previous executions; this
data can be rapidly compared with individual traces and updated in just a couple
of dword- or qword-wide instructions and a simple loop.
When a mutated input produces an execution trace containing new tuples, the
corresponding input file is preserved and routed for additional processing
later on (see section #3). Inputs that do not trigger new local-scale state
transitions in the execution trace (i.e., produce no new tuples) are discarded,
even if their overall control flow sequence is unique.
This approach allows for a very fine-grained and long-term exploration of
program state while not having to perform any computationally intensive and
fragile global comparisons of complex execution traces, and while avoiding the
scourge of path explosion.
To illustrate the properties of the algorithm, consider that the second trace
shown below would be considered substantially new because of the presence of
new tuples (CA, AE):
#1: A -> B -> C -> D -> E
#2: A -> B -> C -> A -> E
At the same time, with #2 processed, the following pattern will not be seen
as unique, despite having a markedly different overall execution path:
#3: A -> B -> C -> A -> B -> C -> A -> B -> C -> D -> E
In addition to detecting new tuples, the fuzzer also considers coarse tuple
hit counts. These are divided into several buckets:
1, 2, 3, 4-7, 8-15, 16-31, 32-127, 128+
To some extent, the number of buckets is an implementation artifact: it allows
an in-place mapping of an 8-bit counter generated by the instrumentation to
an 8-position bitmap relied on by the fuzzer executable to keep track of the
already-seen execution counts for each tuple.
Changes within the range of a single bucket are ignored; transition from one
bucket to another is flagged as an interesting change in program control flow,
and is routed to the evolutionary process outlined in the section below.
The hit count behavior provides a way to distinguish between potentially
interesting control flow changes, such as a block of code being executed
twice when it was normally hit only once. At the same time, it is fairly
insensitive to empirically less notable changes, such as a loop going from
47 cycles to 48. The counters also provide some degree of "accidental"
immunity against tuple collisions in dense trace maps.
The execution is policed fairly heavily through memory and execution time
limits; by default, the timeout is set at 5x the initially-calibrated
execution speed, rounded up to 20 ms. The aggressive timeouts are meant to
prevent dramatic fuzzer performance degradation by descending into tarpits
that, say, improve coverage by 1% while being 100x slower; we pragmatically
reject them and hope that the fuzzer will find a less expensive way to reach
the same code. Empirical testing strongly suggests that more generous time
limits are not worth the cost.
3) Evolving the input queue
---------------------------
Mutated test cases that produced new state transitions within the program are
added to the input queue and used as a starting point for future rounds of
fuzzing. They supplement, but do not automatically replace, existing finds.
In contrast to more greedy genetic algorithms, this approach allows the tool
to progressively explore various disjoint and possibly mutually incompatible
features of the underlying data format, as shown in this image:
http://lcamtuf.coredump.cx/afl/afl_gzip.png
Several practical examples of the results of this algorithm are discussed
here:
http://lcamtuf.blogspot.com/2014/11/pulling-jpegs-out-of-thin-air.html
http://lcamtuf.blogspot.com/2014/11/afl-fuzz-nobody-expects-cdata-sections.html
The synthetic corpus produced by this process is essentially a compact
collection of "hmm, this does something new!" input files, and can be used to
seed any other testing processes down the line (for example, to manually
stress-test resource-intensive desktop apps).
With this approach, the queue for most targets grows to somewhere between 1k
and 10k entries; approximately 10-30% of this is attributable to the discovery
of new tuples, and the remainder is associated with changes in hit counts.
The following table compares the relative ability to discover file syntax and
explore program states when using several different approaches to guided
fuzzing. The instrumented target was GNU patch 2.7.3 compiled with -O3 and
seeded with a dummy text file; the session consisted of a single pass over the
input queue with afl-fuzz:
Fuzzer guidance | Blocks | Edges | Edge hit | Highest-coverage
strategy used | reached | reached | cnt var | test case generated
------------------+---------+---------+----------+---------------------------
(Initial file) | 156 | 163 | 1.00 | (none)
| | | |
Blind fuzzing S | 182 | 205 | 2.23 | First 2 B of RCS diff
Blind fuzzing L | 228 | 265 | 2.23 | First 4 B of -c mode diff
Block coverage | 855 | 1,130 | 1.57 | Almost-valid RCS diff
Edge coverage | 1,452 | 2,070 | 2.18 | One-chunk -c mode diff
AFL model | 1,765 | 2,597 | 4.99 | Four-chunk -c mode diff
The first entry for blind fuzzing ("S") corresponds to executing just a single
round of testing; the second set of figures ("L") shows the fuzzer running in a
loop for a number of execution cycles comparable with that of the instrumented
runs, which required more time to fully process the growing queue.
Roughly similar results have been obtained in a separate experiment where the
fuzzer was modified to compile out all the random fuzzing stages and leave just
a series of rudimentary, sequential operations such as walking bit flips.
Because this mode would be incapable of altering the size of the input file,
the sessions were seeded with a valid unified diff:
Queue extension | Blocks | Edges | Edge hit | Number of unique
strategy used | reached | reached | cnt var | crashes found
------------------+---------+---------+----------+------------------
(Initial file) | 624 | 717 | 1.00 | -
| | | |
Blind fuzzing | 1,101 | 1,409 | 1.60 | 0
Block coverage | 1,255 | 1,649 | 1.48 | 0
Edge coverage | 1,259 | 1,734 | 1.72 | 0
AFL model | 1,452 | 2,040 | 3.16 | 1
At noted earlier on, some of the prior work on genetic fuzzing relied on
maintaining a single test case and evolving it to maximize coverage. At least
in the tests described above, this "greedy" approach appears to confer no
substantial benefits over blind fuzzing strategies.
4) Culling the corpus
---------------------
The progressive state exploration approach outlined above means that some of
the test cases synthesized later on in the game may have edge coverage that
is a strict superset of the coverage provided by their ancestors.
To optimize the fuzzing effort, AFL periodically re-evaluates the queue using a
fast algorithm that selects a smaller subset of test cases that still cover
every tuple seen so far, and whose characteristics make them particularly
favorable to the tool.
The algorithm works by assigning every queue entry a score proportional to its
execution latency and file size; and then selecting lowest-scoring candidates
for each tuple.
The tuples are then processed sequentially using a simple workflow:
1) Find next tuple not yet in the temporary working set,
2) Locate the winning queue entry for this tuple,
3) Register *all* tuples present in that entry's trace in the working set,
4) Go to #1 if there are any missing tuples in the set.
The generated corpus of "favored" entries is usually 5-10x smaller than the
starting data set. Non-favored entries are not discarded, but they are skipped
with varying probabilities when encountered in the queue:
- If there are new, yet-to-be-fuzzed favorites present in the queue, 99%
of non-favored entries will be skipped to get to the favored ones.
- If there are no new favorites:
- If the current non-favored entry was fuzzed before, it will be skipped
95% of the time.
- If it hasn't gone through any fuzzing rounds yet, the odds of skipping
drop down to 75%.
Based on empirical testing, this provides a reasonable balance between queue
cycling speed and test case diversity.
Slightly more sophisticated but much slower culling can be performed on input
or output corpora with afl-cmin. This tool permanently discards the redundant
entries and produces a smaller corpus suitable for use with afl-fuzz or
external tools.
5) Trimming input files
-----------------------
File size has a dramatic impact on fuzzing performance, both because large
files make the target binary slower, and because they reduce the likelihood
that a mutation would touch important format control structures, rather than
redundant data blocks. This is discussed in more detail in perf_tips.txt.
The possibility that the user will provide a low-quality starting corpus aside,
some types of mutations can have the effect of iteratively increasing the size
of the generated files, so it is important to counter this trend.
Luckily, the instrumentation feedback provides a simple way to automatically
trim down input files while ensuring that the changes made to the files have no
impact on the execution path.
The built-in trimmer in afl-fuzz attempts to sequentially remove blocks of data
with variable length and stepover; any deletion that doesn't affect the checksum
of the trace map is committed to disk. The trimmer is not designed to be
particularly thorough; instead, it tries to strike a balance between precision
and the number of execve() calls spent on the process, selecting the block size
and stepover to match. The average per-file gains are around 5-20%.
The standalone afl-tmin tool uses a more exhaustive, iterative algorithm, and
also attempts to perform alphabet normalization on the trimmed files. The
operation of afl-tmin is as follows.
First, the tool automatically selects the operating mode. If the initial input
crashes the target binary, afl-tmin will run in non-instrumented mode, simply
keeping any tweaks that produce a simpler file but still crash the target. If
the target is non-crashing, the tool uses an instrumented mode and keeps only
the tweaks that produce exactly the same execution path.
The actual minimization algorithm is:
1) Attempt to zero large blocks of data with large stepovers. Empirically,
this is shown to reduce the number of execs by preempting finer-grained
efforts later on.
2) Perform a block deletion pass with decreasing block sizes and stepovers,
binary-search-style.
3) Perform alphabet normalization by counting unique characters and trying
to bulk-replace each with a zero value.
4) As a last result, perform byte-by-byte normalization on non-zero bytes.
Instead of zeroing with a 0x00 byte, afl-tmin uses the ASCII digit '0'. This
is done because such a modification is much less likely to interfere with
text parsing, so it is more likely to result in successful minimization of
text files.
The algorithm used here is less involved than some other test case
minimization approaches proposed in academic work, but requires far fewer
executions and tends to produce comparable results in most real-world
applications.
6) Fuzzing strategies
---------------------
The feedback provided by the instrumentation makes it easy to understand the
value of various fuzzing strategies and optimize their parameters so that they
work equally well across a wide range of file types. The strategies used by
afl-fuzz are generally format-agnostic and are discussed in more detail here:
http://lcamtuf.blogspot.com/2014/08/binary-fuzzing-strategies-what-works.html
It is somewhat notable that especially early on, most of the work done by
afl-fuzz is actually highly deterministic, and progresses to random stacked
modifications and test case splicing only at a later stage. The deterministic
strategies include:
- Sequential bit flips with varying lengths and stepovers,
- Sequential addition and subtraction of small integers,
- Sequential insertion of known interesting integers (0, 1, INT_MAX, etc),
The purpose of opening with deterministic steps is related to their tendency to
produce compact test cases and small diffs between the non-crashing and crashing
inputs.
With deterministic fuzzing out of the way, the non-deterministic steps include
stacked bit flips, insertions, deletions, arithmetics, and splicing of different
test cases.
The relative yields and execve() costs of all these strategies have been
investigated and are discussed in the aforementioned blog post.
For the reasons discussed in historical_notes.txt (chiefly, performance,
simplicity, and reliability), AFL generally does not try to reason about the
relationship between specific mutations and program states; the fuzzing steps
are nominally blind, and are guided only by the evolutionary design of the
input queue.
That said, there is one (trivial) exception to this rule: when a new queue
entry goes through the initial set of deterministic fuzzing steps, and tweaks to
some regions in the file are observed to have no effect on the checksum of the
execution path, they may be excluded from the remaining phases of
deterministic fuzzing - and the fuzzer may proceed straight to random tweaks.
Especially for verbose, human-readable data formats, this can reduce the number
of execs by 10-40% or so without an appreciable drop in coverage. In extreme
cases, such as normally block-aligned tar archives, the gains can be as high as
90%.
Because the underlying "effector maps" are local every queue entry and remain
in force only during deterministic stages that do not alter the size or the
general layout of the underlying file, this mechanism appears to work very
reliably and proved to be simple to implement.
7) Dictionaries
---------------
The feedback provided by the instrumentation makes it easy to automatically
identify syntax tokens in some types of input files, and to detect that certain
combinations of predefined or auto-detected dictionary terms constitute a
valid grammar for the tested parser.
A discussion of how these features are implemented within afl-fuzz can be found
here:
http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html
In essence, when basic, typically easily-obtained syntax tokens are combined
together in a purely random manner, the instrumentation and the evolutionary
design of the queue together provide a feedback mechanism to differentiate
between meaningless mutations and ones that trigger new behaviors in the
instrumented code - and to incrementally build more complex syntax on top of
this discovery.
The dictionaries have been shown to enable the fuzzer to rapidly reconstruct
the grammar of highly verbose and complex languages such as JavaScript, SQL,
or XML; several examples of generated SQL statements are given in the blog
post mentioned above.
Interestingly, the AFL instrumentation also allows the fuzzer to automatically
isolate syntax tokens already present in an input file. It can do so by looking
for run of bytes that, when flipped, produce a consistent change to the
program's execution path; this is suggestive of an underlying atomic comparison
to a predefined value baked into the code. The fuzzer relies on this signal
to build compact "auto dictionaries" that are then used in conjunction with
other fuzzing strategies.
8) De-duping crashes
--------------------
De-duplication of crashes is one of the more important problems for any
competent fuzzing tool. Many of the naive approaches run into problems; in
particular, looking just at the faulting address may lead to completely
unrelated issues being clustered together if the fault happens in a common
library function (say, strcmp, strcpy); while checksumming call stack
backtraces can lead to extreme crash count inflation if the fault can be
reached through a number of different, possibly recursive code paths.
The solution implemented in afl-fuzz considers a crash unique if any of two
conditions are met:
- The crash trace includes a tuple not seen in any of the previous crashes,
- The crash trace is missing a tuple that was always present in earlier
faults.
The approach is vulnerable to some path count inflation early on, but exhibits
a very strong self-limiting effect, similar to the execution path analysis
logic that is the cornerstone of afl-fuzz.
9) Investigating crashes
------------------------
The exploitability of many types of crashes can be ambiguous; afl-fuzz tries
to address this by providing a crash exploration mode where a known-faulting
test case is fuzzed in a manner very similar to the normal operation of the
fuzzer, but with a constraint that causes any non-crashing mutations to be
thrown away.
A detailed discussion of the value of this approach can be found here:
http://lcamtuf.blogspot.com/2014/11/afl-fuzz-crash-exploration-mode.html
The method uses instrumentation feedback to explore the state of the crashing
program to get past the ambiguous faulting condition and then isolate the
newly-found inputs for human review.
On the subject of crashes, it is worth noting that in contrast to normal
queue entries, crashing inputs are *not* trimmed; they are kept exactly as
discovered to make it easier to compare them to the parent, non-crashing entry
in the queue. That said, afl-tmin can be used to shrink them at will.
10) The fork server
-------------------
To improve performance, afl-fuzz uses a "fork server", where the fuzzed process
goes through execve(), linking, and libc initialization only once, and is then
cloned from a stopped process image by leveraging copy-on-write. The
implementation is described in more detail here:
http://lcamtuf.blogspot.com/2014/10/fuzzing-binaries-without-execve.html
The fork server is an integral aspect of the injected instrumentation and
simply stops at the first instrumented function to await commands from
afl-fuzz.
With fast targets, the fork server can offer considerable performance gains,
usually between 1.5x and 2x. It is also possible to:
- Use the fork server in manual ("deferred") mode, skipping over larger,
user-selected chunks of initialization code. It requires very modest
code changes to the targeted program, and With some targets, can
produce 10x+ performance gains.
- Enable "persistent" mode, where a single process is used to try out
multiple inputs, greatly limiting the overhead of repetitive fork()
calls. This generally requires some code changes to the targeted program,
but can improve the performance of fast targets by a factor of 5 or more
- approximating the benefits of in-process fuzzing jobs while still
maintaining very robust isolation between the fuzzer process and the
targeted binary.
11) Parallelization
-------------------
The parallelization mechanism relies on periodically examining the queues
produced by independently-running instances on other CPU cores or on remote
machines, and then selectively pulling in the test cases that, when tried
out locally, produce behaviors not yet seen by the fuzzer at hand.
This allows for extreme flexibility in fuzzer setup, including running synced
instances against different parsers of a common data format, often with
synergistic effects.
For more information about this design, see parallel_fuzzing.txt.
12) Binary-only instrumentation
-------------------------------
Instrumentation of black-box, binary-only targets is accomplished with the
help of a separately-built version of QEMU in "user emulation" mode. This also
allows the execution of cross-architecture code - say, ARM binaries on x86.
QEMU uses basic blocks as translation units; the instrumentation is implemented
on top of this and uses a model roughly analogous to the compile-time hooks:
if (block_address > elf_text_start && block_address < elf_text_end) {
cur_location = (block_address >> 4) ^ (block_address << 8);
shared_mem[cur_location ^ prev_location]++;
prev_location = cur_location >> 1;
}
The shift-and-XOR-based scrambling in the second line is used to mask the
effects of instruction alignment.
The start-up of binary translators such as QEMU, DynamoRIO, and PIN is fairly
slow; to counter this, the QEMU mode leverages a fork server similar to that
used for compiler-instrumented code, effectively spawning copies of an
already-initialized process paused at _start.
First-time translation of a new basic block also incurs substantial latency. To
eliminate this problem, the AFL fork server is extended by providing a channel
between the running emulator and the parent process. The channel is used
to notify the parent about the addresses of any newly-encountered blocks and to
add them to the translation cache that will be replicated for future child
processes.
As a result of these two optimizations, the overhead of the QEMU mode is
roughly 2-5x, compared to 100x+ for PIN.
13) The afl-analyze tool
------------------------
The file format analyzer is a simple extension of the minimization algorithm
discussed earlier on; instead of attempting to remove no-op blocks, the tool
performs a series of walking byte flips and then annotates runs of bytes
in the input file.
It uses the following classification scheme:
- "No-op blocks" - segments where bit flips cause no apparent changes to
control flow. Common examples may be comment sections, pixel data within
a bitmap file, etc.
- "Superficial content" - segments where some, but not all, bitflips
produce some control flow changes. Examples may include strings in rich
documents (e.g., XML, RTF).
- "Critical stream" - a sequence of bytes where all bit flips alter control
flow in different but correlated ways. This may be compressed data,
non-atomically compared keywords or magic values, etc.
- "Suspected length field" - small, atomic integer that, when touched in
any way, causes a consistent change to program control flow, suggestive
of a failed length check.
- "Suspected cksum or magic int" - an integer that behaves similarly to a
length field, but has a numerical value that makes the length explanation
unlikely. This is suggestive of a checksum or other "magic" integer.
- "Suspected checksummed block" - a long block of data where any change
always triggers the same new execution path. Likely caused by failing
a checksum or a similar integrity check before any subsequent parsing
takes place.
- "Magic value section" - a generic token where changes cause the type
of binary behavior outlined earlier, but that doesn't meet any of the
other criteria. May be an atomically compared keyword or so.

Binary file not shown.

After

Width:  |  Height:  |  Size: 581 KiB

@ -0,0 +1 @@
() { _; } >_[$($())] { id; }

@ -0,0 +1 @@
() { x() { _; }; x() { _; } <<a; }

Binary file not shown.

After

Width:  |  Height:  |  Size: 892 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 179 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 642 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 595 B

@ -0,0 +1,3 @@
<!DOCTYPEd[<!ENTITY
S ""><!ENTITY %
N "<!ELEMENT<![INCLUDE0"<!ENTITYL%N;

Binary file not shown.

After

Width:  |  Height:  |  Size: 876 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 293 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 434 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 996 B

@ -0,0 +1,2 @@
create table t0(o CHar(0)CHECK(0&O>O));insert into t0
select randomblob(0)-trim(0);

@ -0,0 +1 @@
SELECT 0 UNION SELECT 0 ORDER BY 1 COLLATE"""""""";

@ -0,0 +1 @@
PRAGMA foreign_keys=1;CREATE TABLE t1("""0"PRIMARY KEy REFERENCES t1 ON DELETE SET NULL);REPLACE INTO t1 SELECT(0);

@ -0,0 +1,2 @@
DROP TABLE IF EXISTS t;CREATE VIRTUAL TABLE t0 USING fts4();insert into t0 select zeroblob(0);SAVEPOINT O;insert into t0
select(0);SAVEPOINT E;insert into t0 SELECT 0 UNION SELECT 0'x'ORDER BY x;

File diff suppressed because one or more lines are too long

@ -0,0 +1 @@
SELECT*from(select"",zeroblob(0),zeroblob(1E9),zeroblob(0),zeroblob(150000000),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(1E9),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0)),(select"",zeroblob(1E9),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(1E9),(0),zeroblob(150000000),(0),zeroblob(0),(0)EXCEPT select zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0));

@ -0,0 +1,2 @@
create table t0(t);insert into t0
select strftime();

@ -0,0 +1 @@
SELECT fts3_tokenizer(@0());

@ -0,0 +1 @@
select''like''like''like#0;

@ -0,0 +1 @@
PRAGMA e;select lower(0);select lower(0)"a",""GROUP BY a ORDER BY a;

@ -0,0 +1 @@
WITH x AS(SELECT*FROM t)SELECT""EXCEPT SELECT 0 ORDER BY 0 COLLATE"";

@ -0,0 +1 @@
CREATE VIRTUAL TABLE x USING fts4();VALUES(0,0),(0,0),(0,0),(0,0);PRAGMA writable_schema=ON;UPDATE sqlite_master SET sql=''WHERE name='';UPDATE sqlite_master SET sql='CREATE table t(d CHECK(T(#0)';SAVEPOINT K;SAVEPOINT T;SAVEPOINT T;ANALYZE;ROLLBACK;SAVEPOINT E;DROP TABLE IF EXISTS t;

@ -0,0 +1 @@
CREATE VIRTUAL TABLE t4 USING fts4(0,b,c,notindexed=0);INSERT INTO t4 VALUES('','','0');BEGIN;INSERT INTO t4 VALUES('','','0');INSERT INTO t4(t4)VALUES('integrity-check');

@ -0,0 +1 @@
DETACH(select group_concat(q));

@ -0,0 +1 @@
select(select strftime());

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save