Compare commits

..

4 Commits

Author SHA1 Message Date
“18670363079” 1d189325cd 1.2
2 months ago
“18670363079” eabae9582c 1
2 months ago
“18670363079” 1004ce885a 12.16
2 months ago
“18670363079” 42a166966e 这下交对了吧
4 months ago

@ -1,18 +0,0 @@
{
"configurations": [
{
"name": "windows-gcc-x86",
"includePath": [
"${workspaceFolder}/**"
],
"compilerPath": "C:/Program Files/MinGW/bin/gcc.exe",
"cStandard": "${default}",
"cppStandard": "${default}",
"intelliSenseMode": "windows-gcc-x86",
"compilerArgs": [
""
]
}
],
"version": 4
}

@ -13,89 +13,79 @@
# http://www.apache.org/licenses/LICENSE-2.0
#
# 定义程序名称和版本
PROGNAME = afl
VERSION = $(shell grep '^\#define VERSION ' config.h | cut -d '"' -f2)
# 安装路径
PREFIX ?= /usr/local
BIN_PATH = $(PREFIX)/bin
HELPER_PATH = $(PREFIX)/lib/afl
DOC_PATH = $(PREFIX)/share/doc/afl
MISC_PATH = $(PREFIX)/share/afl
# 程序和脚本的定义
# PROGS intentionally omit afl-as, which gets installed elsewhere.
PROGS = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
SH_PROGS = afl-plot afl-cmin afl-whatsup
# 编译标志
CFLAGS ?= -O3 -funroll-loops
CFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \
-DAFL_PATH=\"$(HELPER_PATH)\" -DDOC_PATH=\"$(DOC_PATH)\" \
-DBIN_PATH=\"$(BIN_PATH)\"
# Linux 平台的链接标志
ifneq "$(filter Linux GNU%,$(shell uname))" ""
LDFLAGS += -ldl
endif
# 判断是否使用 Clang 编译器
ifeq "$(findstring clang, $(shell $(CC) --version 2>/dev/null))" ""
TEST_CC = afl-gcc
else
TEST_CC = afl-clang
endif
# 公共头文件
COMM_HDR = alloc-inl.h config.h debug.h types.h
# 默认目标:编译和测试程序
all: test_x86 $(PROGS) afl-as test_build all_done
# 测试 x86 编译能力
ifndef AFL_NO_X86
test_x86:
@echo "[*] Checking for the ability to compile x86 code..."
@echo 'main() { __asm__("xorb %al, %al"); }' | $(CC) -w -x c - -o .test || ( echo; echo "Oops, looks like your compiler can't generate x86 code."; echo; echo "Don't panic! You can use the LLVM or QEMU mode, but see docs/INSTALL first."; echo "(To ignore this error, set AFL_NO_X86=1 and try again.)"; echo; exit 1 )
@rm -f .test
@echo "[+] Everything seems to be working, ready to compile."
else
test_x86:
@echo "[!] Note: skipping x86 compilation checks (AFL_NO_X86 set)."
endif
# 编译 afl-gcc
afl-gcc: afl-gcc.c $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
set -e; for i in afl-g++ afl-clang afl-clang++; do ln -sf afl-gcc $$i; done
# 编译 afl-as
afl-as: afl-as.c afl-as.h $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
ln -sf afl-as as
# 编译 afl-fuzz
afl-fuzz: afl-fuzz.c $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
# 编译 afl-showmap
afl-showmap: afl-showmap.c $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
# 编译 afl-tmin
afl-tmin: afl-tmin.c $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
# 编译 afl-analyze
afl-analyze: afl-analyze.c $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
# 编译 afl-gotcpu
afl-gotcpu: afl-gotcpu.c $(COMM_HDR) | test_x86
$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
# 测试构建:检查插桩功能
ifndef AFL_NO_X86
test_build: afl-gcc afl-as afl-showmap
@echo "[*] Testing the CC wrapper and instrumentation output..."
unset AFL_USE_ASAN AFL_USE_MSAN; AFL_QUIET=1 AFL_INST_RATIO=100 AFL_PATH=. ./$(TEST_CC) $(CFLAGS) test-instr.c -o test-instr $(LDFLAGS)
@ -104,20 +94,22 @@ test_build: afl-gcc afl-as afl-showmap
@rm -f test-instr
@cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please ping <lcamtuf@google.com> to troubleshoot the issue."; echo; exit 1; fi
@echo "[+] All right, the instrumentation seems to be working!"
else
test_build: afl-gcc afl-as afl-showmap
@echo "[!] Note: skipping build tests (you may need to use LLVM or QEMU mode)."
endif
# 完成构建
all_done: test_build
@if [ ! "`which clang 2>/dev/null`" = "" ]; then echo "[+] LLVM users: see llvm_mode/README.llvm for a faster alternative to afl-gcc."; fi
@echo "[+] All done! Be sure to review README - it's pretty short and useful."
@if [ "`uname`" = "Darwin" ]; then printf "\nWARNING: Fuzzing on MacOS X is slow because of the unusually high overhead of\nfork() on this OS. Consider using Linux or *BSD. You can also use VirtualBox\n(virtualbox.org) to put AFL inside a Linux or *BSD VM.\n\n"; fi
@! tty <&1 >/dev/null || printf "\033[0;30mNOTE: If you can read this, your terminal probably uses white background.\nThis will make the UI hard to read. See docs/status_screen.txt for advice.\033[0m\n" 2>/dev/null
# 清理构建生成的文件
.NOTPARALLEL: clean
clean:
rm -f $(PROGS) afl-as as afl-g++ afl-clang afl-clang++ *.o *~ a.out core core.[1-9][0-9]* *.stackdump test .test test-instr .test-instr0 .test-instr1 qemu_mode/qemu-2.10.0.tar.bz2 afl-qemu-trace
rm -rf out_dir qemu_mode/qemu-2.10.0
@ -125,7 +117,6 @@ clean:
$(MAKE) -C libdislocator clean
$(MAKE) -C libtokencap clean
# 安装程序
install: all
mkdir -p -m 755 $${DESTDIR}$(BIN_PATH) $${DESTDIR}$(HELPER_PATH) $${DESTDIR}$(DOC_PATH) $${DESTDIR}$(MISC_PATH)
rm -f $${DESTDIR}$(BIN_PATH)/afl-plot.sh
@ -137,4 +128,26 @@ ifndef AFL_TRACE_PC
else
if [ -f afl-clang-fast -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi
endif
@echo "[+] All done!"
if [ -f afl-llvm-rt-32.o ]; then set -e; install -m 755 afl-llvm-rt-32.o $${DESTDIR}$(HELPER_PATH); fi
if [ -f afl-llvm-rt-64.o ]; then set -e; install -m 755 afl-llvm-rt-64.o $${DESTDIR}$(HELPER_PATH); fi
set -e; for i in afl-g++ afl-clang afl-clang++; do ln -sf afl-gcc $${DESTDIR}$(BIN_PATH)/$$i; done
install -m 755 afl-as $${DESTDIR}$(HELPER_PATH)
ln -sf afl-as $${DESTDIR}$(HELPER_PATH)/as
install -m 644 README.md docs/ChangeLog docs/*.txt $${DESTDIR}$(DOC_PATH)
cp -r testcases/ $${DESTDIR}$(MISC_PATH)
cp -r dictionaries/ $${DESTDIR}$(MISC_PATH)
publish: clean
test "`basename $$PWD`" = "AFL" || exit 1
test -f ~/www/afl/releases/$(PROGNAME)-$(VERSION).tgz; if [ "$$?" = "0" ]; then echo; echo "Change program version in config.h, mmkay?"; echo; exit 1; fi
cd ..; rm -rf $(PROGNAME)-$(VERSION); cp -pr $(PROGNAME) $(PROGNAME)-$(VERSION); \
tar -cvz -f ~/www/afl/releases/$(PROGNAME)-$(VERSION).tgz $(PROGNAME)-$(VERSION)
chmod 644 ~/www/afl/releases/$(PROGNAME)-$(VERSION).tgz
( cd ~/www/afl/releases/; ln -s -f $(PROGNAME)-$(VERSION).tgz $(PROGNAME)-latest.tgz )
cat docs/README >~/www/afl/README.txt
cat docs/status_screen.txt >~/www/afl/status_screen.txt
cat docs/historical_notes.txt >~/www/afl/historical_notes.txt
cat docs/technical_details.txt >~/www/afl/technical_details.txt
cat docs/ChangeLog >~/www/afl/ChangeLog.txt
cat docs/QuickStartGuide.txt >~/www/afl/QuickStartGuide.txt
echo -n "$(VERSION)" >~/www/afl/version.txt

@ -1,56 +1,56 @@
# american fuzzy lop
[![Build Status](https://travis-ci.org/google/AFL.svg?branch=master)](https://travis-ci.org/google/AFL) # code
[![Build Status](https://travis-ci.org/google/AFL.svg?branch=master)](https://travis-ci.org/google/AFL)
Originally developed by Michal Zalewski <lcamtuf@google.com>.
See [QuickStartGuide.txt](docs/QuickStartGuide.txt) if you don't have time to read
this file.
Originally developed by Michal Zalewski <lcamtuf@google.com>. # code
``````
See [QuickStartGuide.txt](docs/QuickStartGuide.txt) if you don't have time to read # code
this file. ``````
# code
## 1) Challenges of guided fuzzing
Fuzzing is one of the most powerful and proven strategies for identifying
security issues in real-world software; it is responsible for the vast
majority of remote code execution and privilege escalation bugs found to date # code
majority of remote code execution and privilege escalation bugs found to date
in security-critical software.
Unfortunately, fuzzing is also relatively shallow; blind, random mutations # code
Unfortunately, fuzzing is also relatively shallow; blind, random mutations
make it very unlikely to reach certain code paths in the tested code, leaving
some vulnerabilities firmly outside the reach of this technique.
# code
There have been numerous attempts to solve this problem. One of the early
approaches - pioneered by Tavis Ormandy - is corpus distillation. The method
relies on coverage signals to select a subset of interesting seeds from a ``````
relies on coverage signals to select a subset of interesting seeds from a
massive, high-quality corpus of candidate files, and then fuzz them by
traditional means. The approach works exceptionally well, but requires such
a corpus to be readily available. In addition, block coverage measurements
provide only a very simplistic understanding of program state, and are less
useful for guiding the fuzzing effort in the long haul. # code
# code
useful for guiding the fuzzing effort in the long haul.
Other, more sophisticated research has focused on techniques such as program
flow analysis ("concolic execution"), symbolic execution, or static analysis.
All these methods are extremely promising in experimental settings, but tend
to suffer from reliability and performance problems in practical uses - and
currently do not offer a viable alternative to "dumb" fuzzing techniques.
## 2) The afl-fuzz approach # code
## 2) The afl-fuzz approach
American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple
but rock-solid instrumentation-guided genetic algorithm. It uses a modified
form of edge coverage to effortlessly pick up subtle, local-scale changes to
program control flow. ``````
# code
program control flow.
Simplifying a bit, the overall algorithm can be summed up as:
1) Load user-supplied initial test cases into the queue,
2) Take next input file from the queue,
3) Attempt to trim the test case to the smallest size that doesn't alter # code
3) Attempt to trim the test case to the smallest size that doesn't alter
the measured behavior of the program,
4) Repeatedly mutate the file using a balanced and well-researched variety
of traditional fuzzing strategies, # code
of traditional fuzzing strategies,
5) If any of the generated mutations resulted in a new state transition
recorded by the instrumentation, add mutated output as a new entry in the
@ -58,9 +58,9 @@ Simplifying a bit, the overall algorithm can be summed up as:
6) Go to 2.
The discovered test cases are also periodically culled to eliminate ones that ``````
The discovered test cases are also periodically culled to eliminate ones that
have been obsoleted by newer, higher-coverage finds; and undergo several other
instrumentation-driven effort minimization steps. # code
instrumentation-driven effort minimization steps.
As a side result of the fuzzing process, the tool creates a small,
self-contained corpus of interesting test cases. These are extremely useful
@ -69,10 +69,10 @@ for stress-testing browsers, office applications, graphics suites, or
closed-source tools.
The fuzzer is thoroughly tested to deliver out-of-the-box performance far
superior to blind fuzzing or coverage-only tools. # code
superior to blind fuzzing or coverage-only tools.
## 3) Instrumenting programs for use with AFL
# code
When source code is available, instrumentation can be injected by a companion
tool that works as a drop-in replacement for gcc or clang in any standard build
process for third-party code.
@ -89,28 +89,28 @@ $ CC=/path/to/afl/afl-gcc ./configure
$ make clean all
```
For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`. # code
For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`.
The clang wrappers (afl-clang and afl-clang++) can be used in the same way;
clang users may also opt to leverage a higher-performance instrumentation mode, ``````
clang users may also opt to leverage a higher-performance instrumentation mode,
as described in llvm_mode/README.llvm.
``````
When testing libraries, you need to find or write a simple program that reads
data from stdin or from a file and passes it to the tested library. In such a # code
data from stdin or from a file and passes it to the tested library. In such a
case, it is essential to link this executable against a static version of the
instrumented library, or to make sure that the correct .so file is loaded at
runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static # code
runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static
build, usually possible via:
# code
```shell
$ CC=/path/to/afl/afl-gcc ./configure --disable-shared
```
Setting `AFL_HARDEN=1` when calling 'make' will cause the CC wrapper to
automatically enable code hardening options that make it easier to detect
simple memory bugs. Libdislocator, a helper library included with AFL (see # code
libdislocator/README.dislocator) can help uncover heap corruption issues, too. # code
# code
simple memory bugs. Libdislocator, a helper library included with AFL (see
libdislocator/README.dislocator) can help uncover heap corruption issues, too.
PS. ASAN users are advised to review [notes_for_asan.txt](docs/notes_for_asan.txt) file for important
caveats.
@ -118,36 +118,36 @@ caveats.
When source code is *NOT* available, the fuzzer offers experimental support for
fast, on-the-fly instrumentation of black-box binaries. This is accomplished
with a version of QEMU running in the lesser-known "user space emulation" mode. # code
with a version of QEMU running in the lesser-known "user space emulation" mode.
QEMU is a project separate from AFL, but you can conveniently build the
feature by doing: ``````
# code
feature by doing:
```shell
$ cd qemu_mode
$ ./build_qemu_support.sh
```
# code
For additional instructions and caveats, see qemu_mode/README.qemu.
The mode is approximately 2-5x slower than compile-time instrumentation, is
less conducive to parallelization, and may have some other quirks. # code
less conducive to parallelization, and may have some other quirks.
## 5) Choosing initial test cases ``````
## 5) Choosing initial test cases
To operate correctly, the fuzzer requires one or more starting file that
contains a good example of the input data normally expected by the targeted # code
contains a good example of the input data normally expected by the targeted
application. There are two basic rules:
- Keep the files small. Under 1 kB is ideal, although not strictly necessary. # code
- Keep the files small. Under 1 kB is ideal, although not strictly necessary.
For a discussion of why size matters, see [perf_tips.txt](docs/perf_tips.txt).
- Use multiple test cases only if they are functionally different from
each other. There is no point in using fifty different vacation photos
to fuzz an image library.
You can find many good examples of starting files in the testcases/ subdirectory # code
that comes with this tool. ``````
You can find many good examples of starting files in the testcases/ subdirectory
that comes with this tool.
PS. If a large corpus of data is available for screening, you may want to use
the afl-cmin utility to identify a subset of functionally distinct files that
@ -155,82 +155,82 @@ exercise different code paths in the target binary.
## 6) Fuzzing binaries
The fuzzing process itself is carried out by the afl-fuzz utility. This program ``````
requires a read-only directory with initial test cases, a separate place to # code
store its findings, plus a path to the binary to test. # code
The fuzzing process itself is carried out by the afl-fuzz utility. This program
requires a read-only directory with initial test cases, a separate place to
store its findings, plus a path to the binary to test.
For target binaries that accept input directly from stdin, the usual syntax is:
For target binaries that accept input directly from stdin, the usual syntax is: ``````
``````
```shell
$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program [...params...]
``` ``````
# code
```
For programs that take input from a file, use '@@' to mark the location in
the target's command line where the input file name should be placed. The
fuzzer will substitute this for you:
```shell # code
```shell
$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@
```
You can also use the -f option to have the mutated data written to a specific # code
file. This is useful if the program expects a particular file extension or so. ``````
You can also use the -f option to have the mutated data written to a specific
file. This is useful if the program expects a particular file extension or so.
Non-instrumented binaries can be fuzzed in the QEMU mode (add -Q in the command
line) or in a traditional, blind-fuzzer mode (specify -n).
You can use -t and -m to override the default timeout and memory limit for the
executed process; rare examples of targets that may need these settings touched
include compilers and video decoders. # code
include compilers and video decoders.
Tips for optimizing fuzzing performance are discussed in [perf_tips.txt](docs/perf_tips.txt).
Note that afl-fuzz starts by performing an array of deterministic fuzzing ``````
Note that afl-fuzz starts by performing an array of deterministic fuzzing
steps, which can take several days, but tend to produce neat test cases. If you
want quick & dirty results right away - akin to zzuf and other traditional
fuzzers - add the -d option to the command line. # code
``````
fuzzers - add the -d option to the command line.
## 7) Interpreting output
See the [status_screen.txt](docs/status_screen.txt) file for information on
how to interpret the displayed stats and monitor the health of the process.
Be sure to consult this file especially if any UI elements are highlighted in
red.
# code
The fuzzing process will continue until you press Ctrl-C. At minimum, you want
to allow the fuzzer to complete one queue cycle, which may take anywhere from a
couple of hours to a week or so.
# code
There are three subdirectories created within the output directory and updated # code
There are three subdirectories created within the output directory and updated
in real time:
- queue/ - test cases for every distinctive execution path, plus all the
starting files given by the user. This is the synthesized corpus ``````
starting files given by the user. This is the synthesized corpus
mentioned in section 2.
Before using this corpus for any other purposes, you can shrink # code
Before using this corpus for any other purposes, you can shrink
it to a smaller size using the afl-cmin tool. The tool will find
a smaller subset of files offering equivalent edge coverage.
- crashes/ - unique test cases that cause the tested program to receive a
fatal signal (e.g., SIGSEGV, SIGILL, SIGABRT). The entries are # code
fatal signal (e.g., SIGSEGV, SIGILL, SIGABRT). The entries are
grouped by the received signal.
- hangs/ - unique test cases that cause the tested program to time out. The
default time limit before something is classified as a hang is
the larger of 1 second and the value of the -t parameter. # code
the larger of 1 second and the value of the -t parameter.
The value can be fine-tuned by setting AFL_HANG_TMOUT, but this
is rarely necessary. # code
is rarely necessary.
Crashes and hangs are considered "unique" if the associated execution paths
involve any state transitions not seen in previously-recorded faults. If a
single bug can be reached in multiple ways, there will be some count inflation
early in the process, but this should quickly taper off. # code
early in the process, but this should quickly taper off.
The file names for crashes and hangs are correlated with parent, non-faulting
queue entries. This should help with debugging.
When you can't reproduce a crash found by afl-fuzz, the most likely cause is # code
that you are not setting the same memory limit as used by the tool. Try: # code
When you can't reproduce a crash found by afl-fuzz, the most likely cause is
that you are not setting the same memory limit as used by the tool. Try:
```shell
$ LIMIT_MB=50
@ -243,28 +243,28 @@ also change -Sv to -Sd.
Any existing output directory can be also used to resume aborted jobs; try:
```shell
$ ./afl-fuzz -i- -o existing_output_dir [...etc...] # code
``` # code
$ ./afl-fuzz -i- -o existing_output_dir [...etc...]
```
If you have gnuplot installed, you can also generate some pretty graphs for any # code
If you have gnuplot installed, you can also generate some pretty graphs for any
active fuzzing task using afl-plot. For an example of how this looks like,
see [http://lcamtuf.coredump.cx/afl/plot/](http://lcamtuf.coredump.cx/afl/plot/).
## 8) Parallelized fuzzing
Every instance of afl-fuzz takes up roughly one core. This means that on
multi-core systems, parallelization is necessary to fully utilize the hardware. # code
multi-core systems, parallelization is necessary to fully utilize the hardware.
For tips on how to fuzz a common target on multiple cores or multiple networked
machines, please refer to [parallel_fuzzing.txt](docs/parallel_fuzzing.txt).
The parallel fuzzing mode also offers a simple way for interfacing AFL to other
fuzzers, to symbolic or concolic execution engines, and so forth; again, see the # code
fuzzers, to symbolic or concolic execution engines, and so forth; again, see the
last section of [parallel_fuzzing.txt](docs/parallel_fuzzing.txt) for tips.
## 9) Fuzzer dictionaries
By default, afl-fuzz mutation engine is optimized for compact data formats -
say, images, multimedia, compressed data, regular expression syntax, or shell # code
say, images, multimedia, compressed data, regular expression syntax, or shell
scripts. It is somewhat less suited for languages with particularly verbose and
redundant verbiage - notably including HTML, SQL, or JavaScript.
@ -277,48 +277,48 @@ magic headers, or other special tokens associated with the targeted data type
To use this feature, you first need to create a dictionary in one of the two
formats discussed in dictionaries/README.dictionaries; and then point the fuzzer
to it via the -x option in the command line. # code
``````
to it via the -x option in the command line.
(Several common dictionaries are already provided in that subdirectory, too.)
There is no way to provide more structured descriptions of the underlying
syntax, but the fuzzer will likely figure out some of this based on the
instrumentation feedback alone. This actually works in practice, say:
[http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html](http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html) ``````
``````
PS. Even when no explicit dictionary is given, afl-fuzz will try to extract ``````
[http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html](http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html)
PS. Even when no explicit dictionary is given, afl-fuzz will try to extract
existing syntax tokens in the input corpus by watching the instrumentation
very closely during deterministic byte flips. This works for some types of
parsers and grammars, but isn't nearly as good as the -x mode.
If a dictionary is really hard to come by, another option is to let AFL run
for a while, and then use the token capture library that comes as a companion
utility with AFL. For that, see libtokencap/README.tokencap. # code
# code
utility with AFL. For that, see libtokencap/README.tokencap.
## 10) Crash triage
The coverage-based grouping of crashes usually produces a small data set that
can be quickly triaged manually or with a very simple GDB or Valgrind script.
Every crash is also traceable to its parent non-crashing test case in the
queue, making it easier to diagnose faults. # code
queue, making it easier to diagnose faults.
Having said that, it's important to acknowledge that some fuzzing crashes can be # code
difficult to quickly evaluate for exploitability without a lot of debugging and # code
Having said that, it's important to acknowledge that some fuzzing crashes can be
difficult to quickly evaluate for exploitability without a lot of debugging and
code analysis work. To assist with this task, afl-fuzz supports a very unique
"crash exploration" mode enabled with the -C flag. # code
# code
"crash exploration" mode enabled with the -C flag.
In this mode, the fuzzer takes one or more crashing test cases as the input,
and uses its feedback-driven fuzzing strategies to very quickly enumerate all
code paths that can be reached in the program while keeping it in the
crashing state. ``````
crashing state.
Mutations that do not result in a crash are rejected; so are any changes that
do not affect the execution path. ``````
do not affect the execution path.
The output is a small corpus of files that can be very rapidly examined to see # code
what degree of control the attacker has over the faulting address, or whether # code
it is possible to get past an initial out-of-bounds read - and see what lies # code
The output is a small corpus of files that can be very rapidly examined to see
what degree of control the attacker has over the faulting address, or whether
it is possible to get past an initial out-of-bounds read - and see what lies
beneath.
Oh, one more thing: for test case minimization, give afl-tmin a try. The tool
@ -329,19 +329,19 @@ $ ./afl-tmin -i test_case -o minimized_result -- /path/to/program [...]
```
The tool works with crashing and non-crashing test cases alike. In the crash
mode, it will happily accept instrumented and non-instrumented binaries. In the ``````
mode, it will happily accept instrumented and non-instrumented binaries. In the
non-crashing mode, the minimizer relies on standard AFL instrumentation to make
the file simpler without altering the execution path. # code
the file simpler without altering the execution path.
The minimizer accepts the -m, -t, -f and @@ syntax in a manner compatible with
afl-fuzz.
# code
Another recent addition to AFL is the afl-analyze tool. It takes an input
file, attempts to sequentially flip bytes, and observes the behavior of the # code
file, attempts to sequentially flip bytes, and observes the behavior of the
tested program. It then color-codes the input based on which sections appear to
be critical, and which are not; while not bulletproof, it can often offer quick
insights into complex file formats. More info about its operation can be found
near the end of [technical_details.txt](docs/technical_details.txt). # code
near the end of [technical_details.txt](docs/technical_details.txt).
## 11) Going beyond crashes
@ -352,21 +352,21 @@ found by modifying the target programs to call abort() when, say:
- Two bignum libraries produce different outputs when given the same
fuzzer-generated input,
- An image library produces different outputs when asked to decode the same # code
- An image library produces different outputs when asked to decode the same
input image several times in a row,
- A serialization / deserialization library fails to produce stable outputs ``````
- A serialization / deserialization library fails to produce stable outputs
when iteratively serializing and deserializing fuzzer-supplied data,
- A compression library produces an output inconsistent with the input file
when asked to compress and then decompress a particular blob.
Implementing these or similar sanity checks usually takes very little time;
if you are the maintainer of a particular package, you can make this code # code
if you are the maintainer of a particular package, you can make this code
conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also
shared with libfuzzer) or `#ifdef __AFL_COMPILER` (this one is just for AFL).
## 12) Common-sense risks ``````
## 12) Common-sense risks
Please keep in mind that, similarly to many other computationally-intensive
tasks, fuzzing may put strain on your hardware and on the OS. In particular:
@ -379,19 +379,19 @@ tasks, fuzzing may put strain on your hardware and on the OS. In particular:
- Targeted programs may end up erratically grabbing gigabytes of memory or
filling up disk space with junk files. AFL tries to enforce basic memory
limits, but can't prevent each and every possible mishap. The bottom line # code
is that you shouldn't be fuzzing on systems where the prospect of data loss # code
is not an acceptable risk. # code
limits, but can't prevent each and every possible mishap. The bottom line
is that you shouldn't be fuzzing on systems where the prospect of data loss
is not an acceptable risk.
- Fuzzing involves billions of reads and writes to the filesystem. On modern
systems, this will be usually heavily cached, resulting in fairly modest # code
"physical" I/O - but there are many factors that may alter this equation. # code
systems, this will be usually heavily cached, resulting in fairly modest
"physical" I/O - but there are many factors that may alter this equation.
It is your responsibility to monitor for potential trouble; with very heavy
I/O, the lifespan of many HDDs and SSDs may be reduced.
A good way to monitor disk I/O on Linux is the 'iostat' command: # code
A good way to monitor disk I/O on Linux is the 'iostat' command:
```shell # code
```shell
$ iostat -d 3 -x -k [...optional disk ID...]
```
@ -403,34 +403,34 @@ Here are some of the most important caveats for AFL:
a signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for
these signals may need to have the relevant code commented out. In the same
vein, faults in child processed spawned by the fuzzed target may evade
detection unless you manually add some code to catch that. # code
detection unless you manually add some code to catch that.
- As with any other brute-force tool, the fuzzer offers limited coverage if
encryption, checksums, cryptographic signatures, or compression are used to
wholly wrap the actual data format to be tested.
To work around this, you can comment out the relevant checks (see # code
To work around this, you can comment out the relevant checks (see
experimental/libpng_no_checksum/ for inspiration); if this is not possible,
you can also write a postprocessor, as explained in # code
you can also write a postprocessor, as explained in
experimental/post_library/.
- There are some unfortunate trade-offs with ASAN and 64-bit binaries. This
isn't due to any specific fault of afl-fuzz; see [notes_for_asan.txt](docs/notes_for_asan.txt)
for tips. # code
for tips.
- There is no direct support for fuzzing network services, background ``````
- There is no direct support for fuzzing network services, background
daemons, or interactive apps that require UI interaction to work. You may
need to make simple code changes to make them behave in a more traditional
way. Preeny may offer a relatively simple option, too - see: ``````
way. Preeny may offer a relatively simple option, too - see:
https://github.com/zardus/preeny
Some useful tips for modifying network-based services can be also found at: # code
Some useful tips for modifying network-based services can be also found at:
https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
- AFL doesn't output human-readable coverage data. If you want to monitor
coverage, use afl-cov from Michael Rash: https://github.com/mrash/afl-cov
# code
- Occasionally, sentient machines rise against their creators. If this ``````
- Occasionally, sentient machines rise against their creators. If this
happens to you, please consult http://lcamtuf.coredump.cx/prep/.
Beyond this, see INSTALL for platform-specific tips.
@ -438,56 +438,56 @@ Beyond this, see INSTALL for platform-specific tips.
## 14) Special thanks
Many of the improvements to afl-fuzz wouldn't be possible without feedback,
bug reports, or patches from: # code
bug reports, or patches from:
```
Jann Horn Hanno Boeck
Felix Groebert Jakub Wilk # code
Felix Groebert Jakub Wilk
Richard W. M. Jones Alexander Cherepanov
Tom Ritter Hovik Manucharyan
Sebastian Roschke Eberhard Mattes ``````
Padraig Brady Ben Laurie # code
@dronesec Luca Barbato # code
Sebastian Roschke Eberhard Mattes
Padraig Brady Ben Laurie
@dronesec Luca Barbato
Tobias Ospelt Thomas Jarosch
Martin Carpenter Mudge Zatko
Joe Zbiciak Ryan Govostes
Michael Rash William Robinet # code
Michael Rash William Robinet
Jonathan Gray Filipe Cabecinhas
Nico Weber Jodie Cunningham
Andrew Griffiths Parker Thompson
Jonathan Neuschfer Tyler Nighswander
Ben Nagy Samir Aguiar ``````
Aidan Thornton Aleksandar Nikolich # code
Ben Nagy Samir Aguiar
Aidan Thornton Aleksandar Nikolich
Sam Hakim Laszlo Szekeres
David A. Wheeler Turo Lamminen
Andreas Stieger Richard Godbee
Louis Dassy teor2345 ``````
Louis Dassy teor2345
Alex Moneger Dmitry Vyukov
Keegan McAllister Kostya Serebryany # code
Keegan McAllister Kostya Serebryany
Richo Healey Martijn Bogaard
rc0r Jonathan Foote
Christian Holler Dominique Pelle
Jacek Wielemborek Leo Barnes # code
Jacek Wielemborek Leo Barnes
Jeremy Barnes Jeff Trull
Guillaume Endignoux ilovezfs
Daniel Godas-Lopez Franjo Ivancic # code
Austin Seipp Daniel Komaromy # code
Daniel Godas-Lopez Franjo Ivancic
Austin Seipp Daniel Komaromy
Daniel Binderman Jonathan Metzman
Vegard Nossum Jan Kneschke
Kurt Roeckx Marcel Bohme
Van-Thuan Pham Abhik Roychoudhury
Joshua J. Drake Toby Hutton ``````
Rene Freingruber Sergey Davidoff ``````
Joshua J. Drake Toby Hutton
Rene Freingruber Sergey Davidoff
Sami Liedes Craig Young
Andrzej Jackowski Daniel Hodson
```
Thank you! ``````
Thank you!
## 15) Contact
# code
Questions? Concerns? Bug reports? Please use GitHub. ``````
Questions? Concerns? Bug reports? Please use GitHub.
There is also a mailing list for the project; to join, send a mail to
<afl-users+subscribe@googlegroups.com>. Or, if you prefer to browse ``````
<afl-users+subscribe@googlegroups.com>. Or, if you prefer to browse
archives first, try: [https://groups.google.com/group/afl-users](https://groups.google.com/group/afl-users).

@ -113,6 +113,7 @@ static u8 count_class_lookup[256] = {
};
//对内存中的元组计数进行分类。如果edges_only标志被设置则只标记边界情况否则使用count_class_lookup数组对每个字节的计数进行分类
static void classify_counts(u8* mem) {
u32 i = MAP_SIZE;
@ -137,7 +138,7 @@ static void classify_counts(u8* mem) {
/* See if any bytes are set in the bitmap. */
//检查在追踪位图中是否有任何字节被设置。如果有则返回1表示有变化如果没有则返回0。
static inline u8 anything_set(void) {
u32* ptr = (u32*)trace_bits;
@ -151,7 +152,7 @@ static inline u8 anything_set(void) {
/* Get rid of shared memory and temp files (atexit handler). */
//用于在程序退出时清理共享内存和临时文件。它会尝试删除程序输入文件并调用shmctl来删除共享内存标识符
static void remove_shm(void) {
unlink(prog_in); /* Ignore errors */
@ -161,7 +162,7 @@ static void remove_shm(void) {
/* Configure shared memory. */
//配置共享内存。创建一个共享内存段并将其ID设置到环境变量中以便子进程可以使用。同时将trace_bits指向共享内存的起始地址
static void setup_shm(void) {
u8* shm_str;
@ -186,7 +187,7 @@ static void setup_shm(void) {
/* Read initial file. */
//读取要分析的初始文件并将其内容存储在in_data中。同时记录文件的长度并检查文件大小是否在允许的范围内
static void read_initial_file(void) {
struct stat st;
@ -213,7 +214,7 @@ static void read_initial_file(void) {
/* Write output file. */
//将内存中的数据写入到指定的文件路径。如果文件已存在,则先删除。然后创建新文件,并将数据写入
static s32 write_to_file(u8* path, u8* mem, u32 len) {
s32 ret;
@ -234,7 +235,7 @@ static s32 write_to_file(u8* path, u8* mem, u32 len) {
/* Handle timeout signal. */
//处理超时信号。设置child_timed_out标志并尝试杀死子进程
static void handle_timeout(int sig) {
child_timed_out = 1;
@ -245,7 +246,7 @@ static void handle_timeout(int sig) {
/* Execute target application. Returns exec checksum, or 0 if program
times out. */
//执行目标程序并返回执行的校验和或者如果程序超时则返回0。这个函数负责设置环境执行程序并分析退出条件
static u32 run_target(char** argv, u8* mem, u32 len, u8 first_run) {
static struct itimerval it;
@ -370,7 +371,7 @@ static u32 run_target(char** argv, u8* mem, u32 len, u8 first_run) {
#ifdef USE_COLOR
/* Helper function to display a human-readable character. */
//显示一个可读的字符。如果字符是控制字符或非打印字符,则显示其十六进制代码
static void show_char(u8 val) {
switch (val) {
@ -386,7 +387,7 @@ static void show_char(u8 val) {
/* Show the legend */
//显示图例,解释不同颜色和标记的含义
static void show_legend(void) {
SAYF(" " cLGR bgGRA " 01 " cRST " - no-op block "
@ -403,7 +404,7 @@ static void show_legend(void) {
/* Interpret and report a pattern in the input file. */
//解释并报告输入文件中的模式。它分析文件中的每个字节,并根据其对程序执行路径的影响对其进行分类
static void dump_hex(u8* buf, u32 len, u8* b_data) {
u32 i;
@ -558,7 +559,7 @@ static void dump_hex(u8* buf, u32 len, u8* b_data) {
/* Actually analyze! */
//实际执行分析。它通过改变输入文件中的字节并观察对执行路径的影响来推断文件格式的结构
static void analyze(char** argv) {
u32 i;
@ -652,7 +653,7 @@ static void analyze(char** argv) {
/* Handle Ctrl-C and the like. */
//处理停止信号如Ctrl-C。设置stop_soon标志并尝试杀死子进程
static void handle_stop_sig(int sig) {
stop_soon = 1;
@ -663,7 +664,7 @@ static void handle_stop_sig(int sig) {
/* Do basic preparations - persistent fds, filenames, etc. */
//进行基本的准备工作,包括设置持久的文件描述符、文件名等,并配置环境变量
static void set_up_environment(void) {
u8* x;
@ -733,7 +734,7 @@ static void set_up_environment(void) {
/* Setup signal handlers, duh. */
//设置信号处理程序,以便处理停止和超时信号
static void setup_signal_handlers(void) {
struct sigaction sa;
@ -760,7 +761,7 @@ static void setup_signal_handlers(void) {
/* Detect @@ in args. */
//检测@@在参数中的位置,并替换为实际的文件路径
static void detect_file_args(char** argv) {
u32 i = 0;
@ -802,7 +803,7 @@ static void detect_file_args(char** argv) {
/* Display usage hints. */
//显示使用提示
static void usage(u8* argv0) {
SAYF("\n%s [ options ] -- /path/to/target_app [ ... ]\n\n"
@ -836,7 +837,7 @@ static void usage(u8* argv0) {
/* Find binary. */
//查找并验证二进制文件的路径
static void find_binary(u8* fname) {
u8* env_path = 0;
@ -889,7 +890,7 @@ static void find_binary(u8* fname) {
/* Fix up argv for QEMU. */
//为QEMU模式修复argv设置QEMU的参数
static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
char** new_argv = ck_alloc(sizeof(char*) * (argc + 4));
@ -952,7 +953,7 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
/* Main entry point */
//程序的主入口点。处理命令行参数,设置环境,执行分析,并输出结果
int main(int argc, char** argv) {
s32 opt;

@ -14,8 +14,6 @@
limitations under the License.
*/
// 这部分是版权声明和许可协议,声明该文件受到 Apache 2.0 许可证保护。
/*
american fuzzy lop - wrapper for GNU as
---------------------------------------
@ -38,10 +36,6 @@
*/
// 该段注释解释了代码的功能
// 它是一个用于 GNU as 汇编器的包装器
// 目的是预处理由 GCC/Clang 生成的汇编文件并注入必要的仪器代码
#define AFL_MAIN
#include "config.h"
@ -62,42 +56,19 @@
#include <sys/wait.h>
#include <sys/time.h>
// 定义宏 AFL_MAIN通常用于标识主程序或相关模块。
// 包含内部的头文件,这些文件提供了项目的配置、类型定义、调试功能以及内存分配等支持。
// 包含标准的C库和POSIX库头文件用于文件操作、内存管理、时间处理等。
static u8** as_params; /* Parameters passed to the real 'as' */
static u8* input_file; /* Originally specified input file */
static u8* modified_file; /* Instrumented file for the real 'as' */
//声明静态变量:
// as_params保存传递给实际 as 汇编器的参数。
// input_file保存输入的文件路径。
// modified_file保存经过修改的文件路径即注入了仪器代码后的文件
static u8** as_params; /* Parameters passed to the real 'as' */ //传递给as的参数数组
static u8 be_quiet, /* Quiet mode (no stderr output) */
clang_mode, /* Running in clang mode? */
pass_thru, /* Just pass data through? */
just_version, /* Just show version? */
sanitizer; /* Using ASAN / MSAN */
static u8* input_file; /* Originally specified input file */
static u8* modified_file; /* Instrumented file for the real 'as' */ //用于as的插有instrumentation的文件
// 声明其他控制程序行为的静态变量:
// be_quiet控制是否启用静默模式不输出标准错误。
// clang_mode是否处于 clang 模式。
// pass_thru是否跳过修改直接传递数据。
// just_version是否只显示版本信息。
// sanitizer是否启用了地址或内存错误检测工具如 ASAN 或 MSAN
static u32 inst_ratio = 100, /* Instrumentation probability (%) */
as_par_cnt = 1; /* Number of params to 'as' */
// 声明静态变量:
// inst_ratio仪器插入的概率百分比
// as_par_cnt传递给汇编器 as 的参数数量。
static u8 be_quiet, /* Quiet mode (no stderr output) */ //是否开启安静模式不输出到stderr
clang_mode, /* Running in clang mode? */ // 是否在clang模式下运行
pass_thru, /* Just pass data through? */ //是否只是简单地传递数据
just_version, /* Just show version? */ //是否只显示版本号
sanitizer; /* Using ASAN / MSAN */ //是否使用ASAN或MSAN
static u32 inst_ratio = 100, /* Instrumentation probability (%) */ //Instrumentation概率%
as_par_cnt = 1; /* Number of params to 'as' */ //传递给as的参数数量
/* If we don't find --32 or --64 in the command line, default to
instrumentation for whichever mode we were compiled with. This is not
@ -117,32 +88,20 @@ static u8 use_64bit = 0;
#endif /* ^WORD_SIZE_64 */
// 根据编译时的定义判断是否使用 64 位模式。
// 如果没有定义 WORD_SIZE_64则默认使用 32 位模式。
// 如果是苹果平台,还会抛出一个错误,表明不支持 32 位 Apple 平台。
/* Examine and modify parameters to pass to 'as'. Note that the file name
is always the last parameter passed by GCC, so we exploit this property
to keep the code simple. */
// 这段注释解释了接下来要做的工作:分析和修改传递给汇编器 as 的参数,特别是文件名,它总是作为最后一个参数传递给 GCC。
//处理和修改传递给asGNU assembler的参数。这个函数会检查命令行参数并根据这些参数来设置全局变量如use_64bit和as_params
static void edit_params(int argc, char** argv) {
// 定义函数 edit_params用于修改传递给 as 汇编器的参数。
u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
u32 i;
// 获取环境变量 TMPDIR 和 AFL_AS并声明变量 i。
#ifdef __APPLE__
u8 use_clang_as = 0;
// 如果是在 Apple 平台上,声明一个变量 use_clang_as 来标识是否使用 clang 作为汇编器。
/* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
with the code generated by newer versions of clang that are hand-built
by the user. See the thread here: http://goo.gl/HBWDtn.
@ -163,8 +122,6 @@ static void edit_params(int argc, char** argv) {
if (!afl_as) afl_as = getenv("AFL_CXX");
if (!afl_as) afl_as = "clang";
// 如果在 clang 模式下且没有指定 AFL_AS则尝试使用 clang 作为汇编器。
}
#endif /* __APPLE__ */
@ -172,22 +129,15 @@ static void edit_params(int argc, char** argv) {
/* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
is not set. We need to check these non-standard variables to properly
handle the pass_thru logic later on. */
//检查TMPDIR、AFL_AS等环境变量确定临时目录和汇编器路径
if (!tmp_dir) tmp_dir = getenv("TEMP");
if (!tmp_dir) tmp_dir = getenv("TMP");
if (!tmp_dir) tmp_dir = "/tmp";
//如果没有设置 TMPDIR则尝试使用其他环境变量如 TEMP 和 TMP如果都没有设置则默认使用 /tmp。
as_params = ck_alloc((argc + 32) * sizeof(u8*));
// 为汇编器参数分配内存,留出额外的空间。
as_params[0] = afl_as ? afl_as : (u8*)"as";
// 设置 as 汇编器命令,如果没有设置 AFL_AS则默认使用 "as"。
as_params[argc] = 0;
for (i = 1; i < argc - 1; i++) {
@ -198,7 +148,9 @@ static void edit_params(int argc, char** argv) {
#ifdef __APPLE__
/* The Apple case is a bit different... */
//这段代码检查命令行参数中是否有 -arch并检查其后的参数。
//如果 -arch 后面是 x86_64则设置 use_64bit 为 1表示使用 64 位模式。
//如果 -arch 后面是 i386则输出错误信息并终止程序因为 32 位的 Apple 平台不被支持
if (!strcmp(argv[i], "-arch") && i + 1 < argc) {
if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1;
@ -209,27 +161,23 @@ static void edit_params(int argc, char** argv) {
/* Strip options that set the preference for a particular upstream
assembler in Xcode. */
//如果当前处于 clang 模式,并且命令行参数中有 -q 或 -Q则跳过这些参数不将它们传递给汇编器
if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
continue;
#endif /* __APPLE__ */
//将当前处理的命令行参数 argv[i] 添加到 as_params 数组中as_par_cnt 用于记录已添加的参数数量
as_params[as_par_cnt++] = argv[i];
}
// 遍历所有传递给程序的参数,如果参数为 --64 或 --32则根据平台设置 use_64bit
// 对于 Apple 平台,还会检查架构并设置为 64 位。
#ifdef __APPLE__
/* When calling clang as the upstream assembler, append -c -x assembler
and hope for the best. */
// 这段注释说明接下来的操作是调用真正的 as 汇编器,并传递适当的参数。
// 如果一切顺利as 会返回一个 0 的退出代码,程序会传播这个退出代码。
//如果 use_clang_as 为真(即在 macOS 下使用 clang 作为汇编器)
//则向 as_params 数组中添加 -c、-x 和 assembler 参数
//以确保 clang 正确处理汇编文件
if (use_clang_as) {
as_params[as_par_cnt++] = "-c";
@ -238,23 +186,18 @@ static void edit_params(int argc, char** argv) {
}
// 在 exec_real_as 函数中:
// 如果处于静默模式be_quiet则关闭标准错误输出并将其重定向到 /dev/null避免打印调试信息。
// 使用 execvp 执行汇编器命令,传递修改后的参数。
// 如果执行失败,则打印错误信息并退出程序。
#endif /* __APPLE__ */
input_file = argv[argc - 1];
//如果是 --version则设置 just_version 为 1并跳转到 wrap_things_up直接返回版本信息。
//如果输入文件以 - 开头但不是 --version则输出错误信息并终止程序。
//如果输入文件是单独的 -,则将 input_file 设置为 NULL表示从标准输入读取
input_file = argv[argc - 1];
if (input_file[0] == '-') {
if (!strcmp(input_file + 1, "-version")) {
just_version = 1;
modified_file = input_file;
goto wrap_things_up; // 跳到 wrap_things_up 标签,处理版本信息
goto wrap_things_up;
}
if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)");
@ -262,119 +205,134 @@ static void edit_params(int argc, char** argv) {
} else {
/* 检查输入文件路径是否看起来像是编译程序的一部分,而不是使用 gcc 编译一个临时的 .s 文件 */
/* 这段代码是为了绕过某些特殊情况,比如编译 NSS 库时的情况 */
/* Check if this looks like a standard invocation as a part of an attempt
to compile a program, rather than using gcc on an ad-hoc .s file in
a format we may not understand. This works around an issue compiling
NSS. */
//检查输入文件是否位于临时目录(如 /tmp 或 /var/tmp
//如果输入文件不在临时目录中,则设置 pass_thru 为 1表示直接传递文件内容而不进行插桩
if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
strncmp(input_file, "/var/tmp/", 9) &&
strncmp(input_file, "/tmp/", 5)) pass_thru = 1; // 启用通过模式
strncmp(input_file, "/tmp/", 5)) pass_thru = 1;
}
//生成一个临时文件名,用于存储插桩后的汇编文件。文件名格式为 tmp_dir/.afl-PID-TIMESTAMP.s
modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
(u32)time(NULL)); // 为生成的文件名分配内存并格式化
(u32)time(NULL));
//将生成的临时文件名添加到 as_params 数组中,作为汇编器的输入文件,并将数组末尾设置为 NULL表示参数列表结束
wrap_things_up:
as_params[as_par_cnt++] = modified_file; // 将修改后的文件名添加到参数列表
as_params[as_par_cnt] = NULL; // 参数列表以 NULL 结尾
as_params[as_par_cnt++] = modified_file;
as_params[as_par_cnt] = NULL;
}
// 此段代码注释说明了程序如何检查和处理输入文件,包括:
// 如果文件名以 - 开头,可能是传递版本信息或者其他特殊的命令行参数。
// 如果文件路径看起来是临时目录之外的路径则启用“通过模式”pass_thru这表示不进行修改直接传递给汇编器。
/* Process input file, generate modified_file. Insert instrumentation in all
the appropriate places. */
/* 处理输入文件,生成修改后的文件,并在所有适当的位置插入仪器代码 */
//处理输入文件并生成一个修改过的文件modified_file在其中插入 instrumentation仪器化代码。这个函数会读取原始的汇编文件根据配置插入instrumentation代码并将结果写入新的文件
static void add_instrumentation(void) {
static u8 line[MAX_LINE]; // 用于存储每一行的内容
//line[MAX_LINE]:用于存储从输入文件中读取的每一行。
//inf 和 outf分别表示输入文件和输出文件的指针。
//outfd输出文件的文件描述符。
//ins_lines记录插桩的行数。
static u8 line[MAX_LINE];
FILE* inf;
FILE* outf;
s32 outfd;
u32 ins_lines = 0; // 记录插入了多少行
u32 ins_lines = 0;
u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0,
skip_intel = 0, skip_app = 0, instrument_next = 0;
#ifdef __APPLE__
u8* colon_pos; // 用于查找冒号位置
u8* colon_pos;
#endif /* __APPLE__ */
//打开输入文件以供读取。
//如果 input_file 不为空,则打开该文件;否则从标准输入读取
if (input_file) {
inf = fopen(input_file, "r"); // 打开输入文件进行读取
if (!inf) PFATAL("Unable to read '%s'", input_file); // 如果打开失败,打印错误并退出
} else inf = stdin; // 如果没有指定输入文件,使用标准输入
outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600); // 创建输出文件
inf = fopen(input_file, "r");
if (!inf) PFATAL("Unable to read '%s'", input_file);
if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file); // 如果创建失败,打印错误并退出
} else inf = stdin;
//创建并打开输出文件以供写入。
//使用 open 创建文件,并使用 fdopen 将其转换为 FILE* 类型
outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);
outf = fdopen(outfd, "w"); // 获取文件流
if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);
if (!outf) PFATAL("fdopen() failed"); // 如果 fdopen 失败,打印错误并退出
outf = fdopen(outfd, "w");
while (fgets(line, MAX_LINE, inf)) { // 逐行读取输入文件
/* 在某些情况下,我们希望在写入仪器跳板之前先跳过某些标签、宏或注释。
*/
if (!outf) PFATAL("fdopen() failed");
//
while (fgets(line, MAX_LINE, inf)) {
/* In some cases, we want to defer writing the instrumentation trampoline
until after all the labels, macros, comments, etc. If we're in this
mode, and if the line starts with a tab followed by a character, dump
the trampoline now. */
//在适当的位置插入插桩代码。
//如果满足条件(如不在跳过模式、处于 .text 段、需要插桩等),则插入插桩代码,并增加插桩行数
if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
instrument_next && line[0] == '\t' && isalpha(line[1])) {
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE)); // 根据使用的位数选择合适的跳板格式
R(MAP_SIZE));
instrument_next = 0;
ins_lines++; // 统计插入的行数
ins_lines++;
}
/* 输出当前行,只有在通过模式下才跳过插入仪器 */
/* Output the actual line, call it a day in pass-thru mode. */
//将当前行写入输出文件。
fputs(line, outf);
if (pass_thru) continue; // 如果启用了通过模式,则跳过剩下的处理
/* 以下代码处理实际的插桩逻辑,只在 .text 段插入仪器代码 */
//如果处于 pass_thru 模式,则跳过后续处理
if (pass_thru) continue;
/* All right, this is where the actual fun begins. For one, we only want to
instrument the .text section. So, let's keep track of that in processed
files - and let's set instr_ok accordingly. */
//检测并处理 .text 段。
//如果当前行表示 .text 段,则设置 instr_ok 为 1表示可以插桩。
//如果当前行表示其他段(如 .bss 或 .data则设置 instr_ok 为 0表示跳过插桩
if (line[0] == '\t' && line[1] == '.') {
/* OpenBSD 将跳转表直接内联到代码中,这很麻烦。它们使用特定的 p2align 指令格式,
*/
/* OpenBSD puts jump tables directly inline with the code, which is
a bit annoying. They use a specific format of p2align directives
around them, so we use that as a signal. */
if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;
/* 如果检测到 .text 段或相关段,启用插桩 */
if (!strncmp(line + 2, "text\n", 5) ||
!strncmp(line + 2, "section\t.text", 13) ||
!strncmp(line + 2, "section\t__TEXT,__text", 21) ||
!strncmp(line + 2, "section __TEXT,__text", 21)) {
instr_ok = 1;
continue; // 继续处理下一行
continue;
}
/* 如果是其他段(如 bss、data禁用插桩 */
if (!strncmp(line + 2, "section\t", 8) ||
!strncmp(line + 2, "section ", 8) ||
!strncmp(line + 2, "bss\n", 4) ||
!strncmp(line + 2, "data\n", 5)) {
instr_ok = 0;
continue; // 继续处理下一行
continue;
}
}
/* 检测并跳过特定的汇编节(如 .code */
/* Detect off-flavor assembly (rare, happens in gdb). When this is
encountered, we set skip_csect until the opposite directive is
seen, and we do not instrument. */
//处理 .code 指令。
//如果检测到 .code32 或 .code64则根据当前模式设置 skip_csect跳过插桩
if (strstr(line, ".code")) {
if (strstr(line, ".code32")) skip_csect = use_64bit;
@ -382,11 +340,16 @@ static void add_instrumentation(void) {
}
/* 检测并跳过 Intel 语法块 */
/* Detect syntax changes, as could happen with hand-written assembly.
Skip Intel blocks, resume instrumentation when back to AT&T. */
//处理汇编语法变化。
//如果检测到 .intel_syntax则跳过插桩如果检测到 .att_syntax则恢复插桩
if (strstr(line, ".intel_syntax")) skip_intel = 1;
if (strstr(line, ".att_syntax")) skip_intel = 0;
/* 跳过 ad-hoc 的 __asm__ 块 */
/* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
//处理 #APP 和 #NO_APP 块。
//如果检测到 #APP则跳过插桩如果检测到 #NO_APP则恢复插桩
if (line[0] == '#' || line[1] == '#') {
if (strstr(line, "#APP")) skip_app = 1;
@ -394,20 +357,46 @@ static void add_instrumentation(void) {
}
/* 检查并插入仪器代码:主要是函数标签、条件标签等 */
/* If we're in the right mood for instrumenting, check for function
names or conditional labels. This is a bit messy, but in essence,
we want to catch:
^main: - function entry point (always instrumented)
^.L0: - GCC branch label
^.LBB0_0: - clang branch label (but only in clang mode)
^\tjnz foo - conditional branches
...but not:
^# BB#0: - clang comments
^ # BB#0: - ditto
^.Ltmp0: - clang non-branch labels
^.LC0 - GCC non-branch labels
^.LBB0_0: - ditto (when in GCC mode)
^\tjmp foo - non-conditional jumps
Additionally, clang and GCC on MacOS X follow a different convention
with no leading dots on labels, hence the weird maze of #ifdefs
later on.
*/
//处理函数标签和条件分支。
//如果当前行是条件分支指令(如 jnz则插入插桩代码
if (skip_intel || skip_app || skip_csect || !instr_ok ||
line[0] == '#' || line[0] == ' ') continue; // 跳过不需要插入的行
line[0] == '#' || line[0] == ' ') continue;
/* 条件分支指令(如 jnz 等)。我们将插入仪器代码到分支指令后面(用于记录未采取的路径) */
/* Conditional branch instruction (jnz, etc). We append the instrumentation
right after the branch (to instrument the not-taken path) and at the
branch destination label (handled later on). */
if (line[0] == '\t') {
if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE)); // 插入仪器代码
R(MAP_SIZE));
ins_lines++; // 统计插入的行数
ins_lines++;
}
@ -415,19 +404,22 @@ static void add_instrumentation(void) {
}
/* 标签的处理。标签可能是分支目标,我们需要根据格式区分处理 */
/* Label of some sort. This may be a branch destination, but we need to
tread carefully and account for several different formatting
conventions. */
#ifdef __APPLE__
/* 苹果系统标签格式L<whatever><digit>: */
/* Apple: L<whatever><digit>: */
//处理标签。
//如果当前行是标签(如 .L0: 或 LBB0_0:),则根据需要设置 instrument_next表示后续需要插桩
if ((colon_pos = strstr(line, ":"))) {
if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
#else
/* 其他平台的标签格式:.L<whatever>: */
/* Everybody else: .L<whatever>: */
if (strstr(line, ":")) {
@ -435,25 +427,34 @@ static void add_instrumentation(void) {
#endif /* __APPLE__ */
/* 处理跳转目标标签(如 .L0: 或 LBB0_0: */
/* .L0: or LBB0_0: style jump destination */
#ifdef __APPLE__
/* 苹果:L<num> / LBB<num> */
/* Apple: L<num> / LBB<num> */
if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
&& R(100) < inst_ratio) {
#else
/* 其他平台:.L<num> / .LBB<num> */
/* Apple: .L<num> / .LBB<num> */
if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
&& R(100) < inst_ratio) {
#endif /* __APPLE__ */
/* 如果符合条件,则插入仪器代码 */
/* An optimization is possible here by adding the code only if the
label is mentioned in the code in contexts other than call / jmp.
That said, this complicates the code by requiring two-pass
processing (messy with stdin), and results in a speed gain
typically under 10%, because compilers are generally pretty good
about not generating spurious intra-function jumps.
We use deferred output chiefly to avoid disrupting
.Lfunc_begin0-style exception handling calculations (a problem on
MacOS X). */
if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;
@ -461,56 +462,63 @@ static void add_instrumentation(void) {
} else {
/* 函数标签,插入仪器代码 */
/* Function label (always instrumented, deferred mode). */
instrument_next = 1;
}
}
}
fclose(inf); // 关闭输入文件
fclose(outf); // 关闭输出文件
//在所有插桩完成后,插入主插桩代码
if (ins_lines)
fputs(use_64bit ? main_payload_64 : main_payload_32, outf);
//关闭文件并输出插桩结果。
//如果没有插桩目标,则输出警告;否则输出插桩的详细信息
if (input_file) fclose(inf);
fclose(outf);
if (!be_quiet) {
if (!ins_lines) WARNF("No instrumentation targets found%s.",
pass_thru ? " (pass-thru mode)" : "");
else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
ins_lines, use_64bit ? "64" : "32",
getenv("AFL_HARDEN") ? "hardened" :
(sanitizer ? "ASAN/MSAN" : "non-hardened"),
inst_ratio);
}
}
/* Main entry point */
// 在 main 函数中:
// 检查是否只需要显示版本信息,如果是,则调用 print_version 函数。
// 解析命令行参数:
// 解析 -q 启用静默模式。
// 解析 --clang 启用 clang 模式。
// 解析 --pass-through 启用数据通过模式。
// 解析 --sanitizer 启用 sanitizer。
// 解析 --inst-ratio 设置仪器插入的概率。
// 解析 --version 仅显示版本信息。
// 确保输入文件已经指定,如果没有指定,则退出并报错。
// 调用 edit_params 函数修改传递给汇编器的参数。
// 最后,调用 exec_real_as 函数执行实际的汇编器命令。
//程序的主入口点。处理命令行参数设置随机种子调用edit_params来编辑参数
//根据环境变量AFL_INST_RATIO设置instrumentation概率
//然后调用add_instrumentation来添加instrumentation代码并最终执行as
int main(int argc, char** argv) {
s32 pid;
u32 rand_seed;
int status;
u8* inst_ratio_str = getenv("AFL_INST_RATIO");
s32 pid; //用于存储 fork 后的子进程 ID
u32 rand_seed; //用于存储随机种子
int status; //用于存储子进程的退出状态
u8* inst_ratio_str = getenv("AFL_INST_RATIO"); //从环境变量 AFL_INST_RATIO 中获取插桩比例
struct timeval tv;
struct timezone tz;
//检查是否处于 clang 模式。
//如果环境变量 CLANG_ENV_VAR 存在,则设置 clang_mode 为 1否则为 0
clang_mode = !!getenv(CLANG_ENV_VAR);
//检查是否在终端运行,并输出提示信息。
//如果标准错误输出是终端且未设置 AFL_QUIET则输出程序名称和版本信息否则设置 be_quiet 为 1表示静默模式
if (isatty(2) && !getenv("AFL_QUIET")) {
SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
} else be_quiet = 1;
//检查命令行参数是否足够。
//如果参数少于 2 个,则输出帮助信息并退出程序
if (argc < 2) {
SAYF("\n"
@ -525,22 +533,27 @@ int main(int argc, char** argv) {
exit(1);
}
//生成并设置随机种子。
//使用当前时间、微秒数和进程 ID 生成随机种子,并调用 srandom 设置随机数生成器
gettimeofday(&tv, &tz);
rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
srandom(rand_seed);
//调用 edit_params 函数处理命令行参数。
//该函数会解析命令行参数并设置相关变量(如 input_file、modified_file 等)
edit_params(argc, argv);
//从环境变量 AFL_INST_RATIO 中读取插桩比例。
//如果插桩比例无效(不在 0 到 100 之间),则输出错误信息并终止程序
if (inst_ratio_str) {
if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100)
FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
}
//检查并设置环境变量 AS_LOOP_ENV_VAR。
//如果该环境变量已存在,则输出错误信息并终止程序(防止无限循环)。
//否则设置该环境变量为 "1"
if (getenv(AS_LOOP_ENV_VAR))
FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");
@ -549,27 +562,34 @@ int main(int argc, char** argv) {
/* When compiling with ASAN, we don't have a particularly elegant way to skip
ASAN-specific branches. But we can probabilistically compensate for
that... */
//处理 ASAN 或 MSAN 模式。
//如果启用了 ASAN 或 MSAN则设置 sanitizer 为 1并将插桩比例除以 3
if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
sanitizer = 1;
inst_ratio /= 3;
}
//调用 add_instrumentation 函数进行插桩。
//如果未设置 just_version则执行插桩操作
if (!just_version) add_instrumentation();
//创建子进程并执行 as 命令。
//使用 fork 创建子进程,并在子进程中调用 execvp 执行 as 命令。
//如果执行失败,则输出错误信息并终止程序
if (!(pid = fork())) {
execvp(as_params[0], (char**)as_params);
execvp(as_params[0], (char**)as_params);//传递给as的参数数组
FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
}
//等待子进程结束。
//如果 fork 失败,则输出错误信息并终止程序。
//使用 waitpid 等待子进程结束,并获取其退出状态
if (pid < 0) PFATAL("fork() failed");
if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
//删除临时文件。
//如果未设置 AFL_KEEP_ASSEMBLY则删除生成的临时文件
if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);
//使用 WEXITSTATUS 获取子进程的退出状态,并将其作为程序的返回值
exit(WEXITSTATUS(status));
}

@ -111,7 +111,7 @@
oddball register it may touch.
*/
//32位系统的_trampoline_格式用于在函数调用前后保存和恢复寄存器状态并调用__afl_maybe_log函数
static const u8* trampoline_fmt_32 =
"\n"
@ -135,6 +135,7 @@ static const u8* trampoline_fmt_32 =
"/* --- END --- */\n"
"\n";
//64位系统的_trampoline_格式功能与32位版本类似
static const u8* trampoline_fmt_64 =
"\n"
@ -156,6 +157,7 @@ static const u8* trampoline_fmt_64 =
"/* --- END --- */\n"
"\n";
//这些是32位和64位系统的主要payload包含了AFL的instrumentation逻辑
static const u8* main_payload_32 =
"\n"
@ -167,7 +169,7 @@ static const u8* main_payload_32 =
".align 8\n"
"\n"
"__afl_maybe_log:\n"
"__afl_maybe_log:\n" //检查共享内存区域是否已经映射并调用__afl_store来记录分支信息
"\n"
" lahf\n"
" seto %al\n"
@ -178,7 +180,7 @@ static const u8* main_payload_32 =
" testl %edx, %edx\n"
" je __afl_setup\n"
"\n"
"__afl_store:\n"
"__afl_store:\n" //计算并存储指定代码位置的分支信息
"\n"
" /* Calculate and store hit for the code location specified in ecx. There\n"
" is a double-XOR way of doing this without tainting another register,\n"
@ -199,7 +201,7 @@ static const u8* main_payload_32 =
" incb (%edx, %edi, 1)\n"
#endif /* ^SKIP_COUNTS */
"\n"
"__afl_return:\n"
"__afl_return:\n" //处理返回前的标志寄存器
"\n"
" addb $127, %al\n"
" sahf\n"
@ -207,7 +209,7 @@ static const u8* main_payload_32 =
"\n"
".align 8\n"
"\n"
"__afl_setup:\n"
"__afl_setup:\n" //设置共享内存区域如果设置失败则跳转到__afl_setup_abort
"\n"
" /* Do not retry setup if we had previous failures. */\n"
"\n"
@ -249,7 +251,7 @@ static const u8* main_payload_32 =
" popl %ecx\n"
" popl %eax\n"
"\n"
"__afl_forkserver:\n"
"__afl_forkserver:\n" //进入fork服务器模式避免execve()调用的开销
"\n"
" /* Enter the fork server mode to avoid the overhead of execve() calls. */\n"
"\n"
@ -269,9 +271,9 @@ static const u8* main_payload_32 =
" addl $12, %esp\n"
"\n"
" cmpl $4, %eax\n"
" jne __afl_fork_resume\n"
" jne __afl_fork_resume\n" //在子进程中关闭文件描述符并继续执行
"\n"
"__afl_fork_wait_loop:\n"
"__afl_fork_wait_loop:\n" //等待父进程的指令如果读取失败则跳转到__afl_die
"\n"
" /* Wait for parent by reading from the pipe. Abort if read fails. */\n"
"\n"
@ -282,7 +284,7 @@ static const u8* main_payload_32 =
" addl $12, %esp\n"
"\n"
" cmpl $4, %eax\n"
" jne __afl_die\n"
" jne __afl_die\n" //终止当前进程
"\n"
" /* Once woken up, create a clone of our process. This is an excellent use\n"
" case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n"
@ -346,7 +348,7 @@ static const u8* main_payload_32 =
" xorl %eax, %eax\n"
" call _exit\n"
"\n"
"__afl_setup_abort:\n"
"__afl_setup_abort:\n" //记录设置失败避免重复调用shmget()/shmat()
"\n"
" /* Record setup failure so that we don't keep calling\n"
" shmget() / shmat() over and over again. */\n"
@ -356,7 +358,7 @@ static const u8* main_payload_32 =
" popl %eax\n"
" jmp __afl_return\n"
"\n"
".AFL_VARS:\n"
".AFL_VARS:\n" //定义了一系列全局变量包括共享内存区域指针、设置失败标志、前一个位置指针、fork的PID和临时变量
"\n"
" .comm __afl_area_ptr, 4, 32\n"
" .comm __afl_setup_failure, 1, 32\n"
@ -366,7 +368,7 @@ static const u8* main_payload_32 =
" .comm __afl_fork_pid, 4, 32\n"
" .comm __afl_temp, 4, 32\n"
"\n"
".AFL_SHM_ENV:\n"
".AFL_SHM_ENV:\n" //定义了共享内存环境变量的名称
" .asciz \"" SHM_ENV_VAR "\"\n"
"\n"
"/* --- END --- */\n"
@ -379,7 +381,7 @@ static const u8* main_payload_32 =
they are doing relocations differently from everybody else. We also need
to work around the crash issue with .lcomm and the fact that they don't
recognize .string. */
//由于这些平台在指令识别和库函数调用方面与其他平台不同,因此需要特定的代码来处理这些差异
#ifdef __APPLE__
# define CALL_L64(str) "call _" str "\n"
#else

@ -408,7 +408,7 @@ static void shuffle_ptrs(void** ptrs, u32 cnt) {
/* Build a list of processes bound to specific cores. Returns -1 if nothing
can be found. Assumes an upper bound of 4k CPUs. */
//绑定到空闲CPU核心。如果可能这个函数会将AFL的进程绑定到一个空闲的CPU核心上以减少上下文切换和调度延迟。
static void bind_to_free_cpu(void) {
DIR* d;
@ -722,7 +722,7 @@ static u8* DTD(u64 cur_ms, u64 event_ms) {
/* Mark deterministic checks as done for a particular queue entry. We use the
.state file to avoid repeating deterministic fuzzing when resuming aborted
scans. */
//mark_as_det_done 和 mark_as_variable共同用于标记测试用例的状态用于标记测试用例是否已经完成了确定性的测试或者是否表现出可变的行为。
static void mark_as_det_done(struct queue_entry* q) {
u8* fn = strrchr(q->fname, '/');
@ -838,7 +838,7 @@ static void add_to_queue(u8* fname, u32 len, u8 passed_det) {
/* Destroy the entire queue. */
//销毁队列的函数。它释放与测试用例队列相关的所有内存资源。
EXP_ST void destroy_queue(void) {
struct queue_entry *q = queue, *n;
@ -903,7 +903,7 @@ EXP_ST void read_bitmap(u8* fname) {
This function is called after every exec() on a fairly large buffer, so
it needs to be fast. We do this in 32-bit and 64-bit flavors. */
//检查执行路径是否带来了新的位图信息。这有助于AFL确定一个测试用例是否探索了新的代码路径。
static inline u8 has_new_bits(u8* virgin_map) {
#ifdef WORD_SIZE_64
@ -976,7 +976,7 @@ static inline u8 has_new_bits(u8* virgin_map) {
/* Count the number of bits set in the provided bitmap. Used for the status
screen several times every second, does not have to be fast. */
//计算位图中设置的位数,用于评估测试用例的覆盖率。
static u32 count_bits(u8* mem) {
u32* ptr = (u32*)mem;
@ -1011,7 +1011,7 @@ static u32 count_bits(u8* mem) {
/* Count the number of bytes set in the bitmap. Called fairly sporadically,
mostly to update the status screen or calibrate and examine confirmed
new paths. */
//计算位图中设置的字节数,用于评估测试用例的覆盖率。
static u32 count_bytes(u8* mem) {
u32* ptr = (u32*)mem;
@ -1077,7 +1077,7 @@ static const u8 simplify_lookup[256] = {
};
#ifdef WORD_SIZE_64
//简化跟踪信息,处理执行跟踪数据,以便更高效地存储和比较。
static void simplify_trace(u64* mem) {
u32 i = MAP_SIZE >> 3;
@ -1156,7 +1156,7 @@ static const u8 count_class_lookup8[256] = {
static u16 count_class_lookup16[65536];
//初始化计数分类表。这个函数用于设置一个查找表,该表用于快速分类和处理执行跟踪数据。
EXP_ST void init_count_class16(void) {
u32 b1, b2;
@ -1171,7 +1171,7 @@ EXP_ST void init_count_class16(void) {
#ifdef WORD_SIZE_64
//分类执行跟踪信息,处理执行跟踪数据,以便更高效地存储和比较。
static inline void classify_counts(u64* mem) {
u32 i = MAP_SIZE >> 3;
@ -1261,7 +1261,7 @@ static void minimize_bits(u8* dst, u8* src) {
The first step of the process is to maintain a list of top_rated[] entries
for every byte in the bitmap. We win that slot if there is no previous
contender, or if the contender has a more favorable speed x size factor. */
//更新位图分数的函数。它根据测试用例的执行时间和覆盖率来调整其在队列中的优先级。
static void update_bitmap_score(struct queue_entry* q) {
u32 i;
@ -1312,7 +1312,7 @@ static void update_bitmap_score(struct queue_entry* q) {
previously-unseen bytes (temp_v) and marks them as favored, at least
until the next run. The favored entries are given more air time during
all fuzzing steps. */
//筛选队列的函数。它通过评估队列中的测试用例,移除那些不再需要进一步测试的案例,以优化模糊测试的效率。
static void cull_queue(void) {
struct queue_entry* q;
@ -1367,7 +1367,7 @@ static void cull_queue(void) {
/* Configure shared memory and virgin_bits. This is called at startup. */
//设置共享内存。共享内存用于存储位图和其他状态信息,这些信息在目标程序的多个实例之间共享。
EXP_ST void setup_shm(void) {
u8* shm_str;
@ -1430,7 +1430,7 @@ static void setup_post(void) {
/* Read all testcases from the input directory, then queue them for testing.
Called at startup. */
//从输入目录中读取所有测试用例,并将它们排队以供测试。
static void read_testcases(void) {
struct dirent **nl;
@ -1687,7 +1687,7 @@ static void load_extras_file(u8* fname, u32* min_len, u32* max_len,
/* Read extras from the extras directory and sort them by size. */
//加载额外的测试用例,处理从外部源加载额外的测试用例。
static void load_extras(u8* dir) {
DIR* d;
@ -1905,7 +1905,7 @@ sort_a_extras:
/* Save automatically generated extras. */
//保存额外的测试用例,保存在模糊测试过程中自动生成的测试用例。
static void save_auto(void) {
u32 i;
@ -1933,7 +1933,7 @@ static void save_auto(void) {
/* Load automatically generated extras. */
// 用于加载自动生成的extras的函数。
static void load_auto(void) {
u32 i;
@ -1976,7 +1976,7 @@ static void load_auto(void) {
/* Destroy extras. */
// 用于销毁extras的函数。
static void destroy_extras(void) {
u32 i;
@ -2001,7 +2001,7 @@ static void destroy_extras(void) {
In essence, the instrumentation allows us to skip execve(), and just keep
cloning a stopped child. So, we just execute once, and then send commands
through a pipe. The other part of this logic is in afl-as.h. */
//初始化fork服务器的函数。在AFL中fork服务器是一种优化技术它允许AFL避免频繁地执行execve()来启动目标程序。相反,它通过克隆一个已经执行的目标程序进程来实现。
EXP_ST void init_forkserver(char** argv) {
static struct itimerval it;
@ -2286,7 +2286,7 @@ EXP_ST void init_forkserver(char** argv) {
/* Execute target application, monitoring for timeouts. Return status
information. The called program will update trace_bits[]. */
//运行目标程序并监控超时。它负责启动目标程序,等待其执行完成,并捕获任何超时或崩溃。
static u8 run_target(char** argv, u32 timeout) {
static struct itimerval it;
@ -2504,7 +2504,7 @@ static u8 run_target(char** argv, u32 timeout) {
/* Write modified data to file for testing. If out_file is set, the old file
is unlinked and a new one is created. Otherwise, out_fd is rewound and
truncated. */
//将修改后的数据写入测试用例文件。如果指定了输出文件,则会创建一个新文件;否则,会重置并截断现有的输出文件描述符。
static void write_to_testcase(void* mem, u32 len) {
s32 fd = out_fd;
@ -2561,13 +2561,13 @@ static void write_with_gap(void* mem, u32 len, u32 skip_at, u32 skip_len) {
}
//显示模糊测试的统计信息,如执行速度、发现的路径数量、唯一崩溃等。
static void show_stats(void);
/* Calibrate a new test case. This is done when processing the input directory
to warn about flaky or otherwise problematic test cases early on; and when
new paths are discovered to detect variable behavior and so on. */
//校准测试用例的函数。它执行目标程序多次,以确定测试用例的执行时间和覆盖率,这有助于确定测试用例的优先级。
static u8 calibrate_case(char** argv, struct queue_entry* q, u8* use_mem,
u32 handicap, u8 from_queue) {
@ -2722,7 +2722,7 @@ abort_calibration:
/* Examine map coverage. Called once, for first test case. */
//检查位图覆盖率。如果位图的覆盖率不足,这个函数会提醒用户可能需要重新编译目标程序以获得更好的覆盖率。
static void check_map_coverage(void) {
u32 i;
@ -2739,7 +2739,7 @@ static void check_map_coverage(void) {
/* Perform dry run of all test cases to confirm that the app is working as
expected. This is done only for the initial inputs, and only once. */
//执行干运行。在正式开始模糊测试之前,这个函数用于验证目标程序的行为,确保它能够正确处理初始的测试用例集。
static void perform_dry_run(char** argv) {
struct queue_entry* q = queue;
@ -2971,7 +2971,7 @@ static void link_or_copy(u8* old_path, u8* new_path) {
}
// 用于删除用于即时会话恢复的临时目录的函数。
static void nuke_resume_dir(void);
/* Create hard links for input test cases in the output directory, choosing
@ -3417,7 +3417,7 @@ static void find_timeout(void) {
/* Update stats file for unattended monitoring. */
// 用于更新统计文件的函数,用于无人值守的监控。
static void write_stats_file(double bitmap_cvg, double stability, double eps) {
static double last_bcvg, last_stab, last_eps;
@ -3516,7 +3516,7 @@ static void write_stats_file(double bitmap_cvg, double stability, double eps) {
/* Update the plot file if there is a reason to. */
// 用于更新绘图文件的函数,如果有必要的话。
static void maybe_update_plot_file(double bitmap_cvg, double eps) {
static u32 prev_qp, prev_pf, prev_pnf, prev_ce, prev_md;
@ -3556,7 +3556,7 @@ static void maybe_update_plot_file(double bitmap_cvg, double eps) {
/* A helper function for maybe_delete_out_dir(), deleting all prefixed
files in a directory. */
// 用于删除所有以特定前缀开头的文件的辅助函数。
static u8 delete_files(u8* path, u8* prefix) {
DIR* d;
@ -3587,7 +3587,7 @@ static u8 delete_files(u8* path, u8* prefix) {
/* Get the number of runnable processes, with some simple smoothing. */
// 用于获取可运行进程数量的函数,带有一定的简单平滑处理。
static double get_runnable_processes(void) {
static double res;
@ -3680,7 +3680,7 @@ dir_cleanup_failed:
/* Delete fuzzer output directory if we recognize it as ours, if the fuzzer
is not currently running, and if the last run time isn't too great. */
// 用于删除输出目录的函数如果它被认为是我们的并且fuzzer没有在运行并且最后一次运行时间不是很长。
static void maybe_delete_out_dir(void) {
FILE* f;
@ -4646,7 +4646,7 @@ abort_trimming:
/* Write a modified test case, run program, process results. Handle
error conditions, returning 1 if it's time to bail out. This is
a helper function for fuzz_one(). */
//执行模糊测试的通用部分。它负责将变异后的测试用例写入文件,执行目标程序,并处理执行结果。
EXP_ST u8 common_fuzz_stuff(char** argv, u8* out_buf, u32 len) {
u8 fault;
@ -5000,6 +5000,7 @@ static u8 could_be_interest(u32 old_val, u32 new_val, u8 blen, u8 check_le) {
function is a tad too long... returns 0 if fuzzed successfully, 1 if
skipped or bailed out. */
// 执行一次模糊测试的函数。它接受一个测试用例,应用各种变异技术来生成新的测试用例,然后执行目标程序来检查新测试用例的效果。
static u8 fuzz_one(char** argv) {
s32 len, fd, temp_len, i, j;
@ -6691,7 +6692,7 @@ abandon_entry:
/* Grab interesting test cases from other fuzzers. */
//在分布式模糊测试中这个函数用于同步不同fuzzer实例的进度。
static void sync_fuzzers(char** argv) {
DIR* sd;
@ -6829,7 +6830,7 @@ static void sync_fuzzers(char** argv) {
/* Handle stop signal (Ctrl-C, etc). */
//处理停止信号如Ctrl+C的函数。它设置一个标志来告诉主循环停止执行。
static void handle_stop_sig(int sig) {
stop_soon = 1;
@ -6841,7 +6842,7 @@ static void handle_stop_sig(int sig) {
/* Handle skip request (SIGUSR1). */
// 用于处理用户请求跳过当前输入的信号SIGUSR1的函数。
static void handle_skipreq(int sig) {
skip_requested = 1;
@ -6849,7 +6850,7 @@ static void handle_skipreq(int sig) {
}
/* Handle timeout (SIGALRM). */
//处理超时信号的函数。它设置一个标志来指示目标程序已经超时。
static void handle_timeout(int sig) {
if (child_pid > 0) {
@ -6870,7 +6871,7 @@ static void handle_timeout(int sig) {
/* Do a PATH search and find target binary to see that it exists and
isn't a shell script - a common and painful mistake. We also check for
a valid ELF header and for evidence of AFL instrumentation. */
//检查目标二进制文件是否存在是否可执行以及是否具有AFL所需的 instrumentation instrumentation是AFL用来追踪执行路径的一种技术
EXP_ST void check_binary(u8* fname) {
u8* env_path = 0;
@ -7067,7 +7068,7 @@ static void fix_up_banner(u8* name) {
/* Check if we're on TTY. */
//检查程序是否运行在TTY上。这个函数用于确定输出是否应该是交互式的以及是否应该显示进度信息。
static void check_if_tty(void) {
struct winsize ws;
@ -7150,7 +7151,7 @@ static void usage(u8* argv0) {
/* Prepare output directories and fds. */
//准备输出目录和文件描述符。它确保所有必要的目录都存在,并设置了一些文件描述符,以便在模糊测试过程中使用。
EXP_ST void setup_dirs_fds(void) {
u8* tmp;
@ -7273,7 +7274,7 @@ EXP_ST void setup_dirs_fds(void) {
/* Setup the output file for fuzzed data, if not using -f. */
// 准备输出文件用于测试的函数。如果指定了输出文件,则会创建一个新文件;否则,会重置并截断现有的输出文件描述符。
EXP_ST void setup_stdio_file(void) {
u8* fn = alloc_printf("%s/.cur_input", out_dir);
@ -7356,7 +7357,7 @@ static void check_crash_handling(void) {
/* Check CPU governor. */
//检查CPU调速器设置的函数。确保CPU在高负载下不会降频从而影响测试的执行速度。
static void check_cpu_governor(void) {
FILE* f;
@ -7412,7 +7413,7 @@ static void check_cpu_governor(void) {
/* Count the number of logical CPU cores. */
//获取CPU核心数。这个函数用于确定系统中可用的CPU核心数以便AFL可以有效地分配工作负载。
static void get_core_count(void) {
u32 cur_runnable = 0;
@ -7541,14 +7542,14 @@ static void fix_up_sync(void) {
/* Handle screen resize (SIGWINCH). */
// 处理屏幕大小变化的信号处理函数。
static void handle_resize(int sig) {
clear_screen = 1;
}
/* Check ASAN options. */
// 检查ASAN选项的函数。
static void check_asan_opts(void) {
u8* x = getenv("ASAN_OPTIONS");
@ -7579,7 +7580,7 @@ static void check_asan_opts(void) {
/* Detect @@ in args. */
// 用于处理文件名中'@@'参数的函数。
EXP_ST void detect_file_args(char** argv) {
u32 i = 0;
@ -7628,7 +7629,7 @@ EXP_ST void detect_file_args(char** argv) {
/* Set up signal handlers. More complicated that needs to be, because libc on
Solaris doesn't resume interrupted reads(), sets SA_RESETHAND when you call
siginterrupt(), and does other unnecessary things. */
// 设置信号处理程序的函数。
EXP_ST void setup_signal_handlers(void) {
struct sigaction sa;
@ -7671,7 +7672,7 @@ EXP_ST void setup_signal_handlers(void) {
/* Rewrite argv for QEMU. */
// 用于重写argv以用于QEMU的函数。
static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
char** new_argv = ck_alloc(sizeof(char*) * (argc + 4));
@ -7774,7 +7775,7 @@ static void save_cmdline(u32 argc, char** argv) {
#ifndef AFL_LIB
/* Main entry point */
//程序的主入口点。它处理命令行参数,设置信号处理程序,初始化共享内存,读取测试用例,执行干运行以验证目标程序,然后进入主循环进行模糊测试。
int main(int argc, char** argv) {
s32 opt;
@ -8194,4 +8195,4 @@ stop_fuzzing:
}
#endif /* !AFL_LIB */
#endif /* !AFL_LIB */

@ -60,7 +60,7 @@ static u8 be_quiet, /* Quiet mode */
/* Try to find our "fake" GNU assembler in AFL_PATH or at the location derived
from argv[0]. If that fails, abort. */
// 查找并设置汇编器as的路径这是AFL编译过程中需要的。
static void find_as(u8* argv0) {
u8 *afl_path = getenv("AFL_PATH");
@ -114,7 +114,7 @@ static void find_as(u8* argv0) {
/* Copy argv to cc_params, making the necessary edits. */
// 复制并编辑参数为调用实际的编译器如gcc或clang做准备
static void edit_params(u32 argc, char** argv) {
u8 fortify_set = 0, asan_set = 0;
@ -306,7 +306,7 @@ static void edit_params(u32 argc, char** argv) {
/* Main entry point */
// 程序的主入口点,处理命令行参数并调用实际的编译器。
int main(int argc, char** argv) {
if (isatty(2) && !getenv("AFL_QUIET")) {

Loading…
Cancel
Save