You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
AFL/src/afl-as.c

597 lines
22 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/*
Copyright 2013 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - wrapper for GNU as
---------------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
The sole purpose of this wrapper is to preprocess assembly files generated
by GCC / clang and inject the instrumentation bits included from afl-as.h. It
is automatically invoked by the toolchain when compiling programs using
afl-gcc / afl-clang.
Note that it's an explicit non-goal to instrument hand-written assembly,
be it in separate .s files or in __asm__ blocks. The only aspiration this
utility has right now is to be able to skip them gracefully and allow the
compilation process to continue.
That said, see experimental/clang_asm_normalize/ for a solution that may
allow clang users to make things work even with hand-crafted assembly. Just
note that there is no equivalent for GCC.
*/
#define AFL_MAIN
#include "config.h"
#include "types.h"
#include "debug.h"
#include "alloc-inl.h"
#include "afl-as.h"
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <ctype.h>
#include <fcntl.h>
#include <sys/wait.h>
#include <sys/time.h>
static u8** as_params; /* Parameters passed to the real 'as' */ //传递给as的参数数组
static u8* input_file; /* Originally specified input file */
static u8* modified_file; /* Instrumented file for the real 'as' */ //用于as的插有instrumentation的文件
static u8 be_quiet, /* Quiet mode (no stderr output) */ //是否开启安静模式不输出到stderr
clang_mode, /* Running in clang mode? */ // 是否在clang模式下运行
pass_thru, /* Just pass data through? */ //是否只是简单地传递数据
just_version, /* Just show version? */ //是否只显示版本号
sanitizer; /* Using ASAN / MSAN */ //是否使用ASAN或MSAN
static u32 inst_ratio = 100, /* Instrumentation probability (%) */ //Instrumentation概率%
as_par_cnt = 1; /* Number of params to 'as' */ //传递给as的参数数量
/* If we don't find --32 or --64 in the command line, default to
instrumentation for whichever mode we were compiled with. This is not
perfect, but should do the trick for almost all use cases. */
#ifdef WORD_SIZE_64
static u8 use_64bit = 1;
#else
static u8 use_64bit = 0;
#ifdef __APPLE__
# error "Sorry, 32-bit Apple platforms are not supported."
#endif /* __APPLE__ */
#endif /* ^WORD_SIZE_64 */
/* Examine and modify parameters to pass to 'as'. Note that the file name
is always the last parameter passed by GCC, so we exploit this property
to keep the code simple. */
//处理和修改传递给asGNU assembler的参数。这个函数会检查命令行参数并根据这些参数来设置全局变量如use_64bit和as_params
static void edit_params(int argc, char** argv) {
u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
u32 i;
#ifdef __APPLE__
u8 use_clang_as = 0;
/* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
with the code generated by newer versions of clang that are hand-built
by the user. See the thread here: http://goo.gl/HBWDtn.
To work around this, when using clang and running without AFL_AS
specified, we will actually call 'clang -c' instead of 'as -q' to
compile the assembly file.
The tools aren't cmdline-compatible, but at least for now, we can
seemingly get away with this by making only very minor tweaks. Thanks
to Nico Weber for the idea. */
if (clang_mode && !afl_as) {
use_clang_as = 1;
afl_as = getenv("AFL_CC");
if (!afl_as) afl_as = getenv("AFL_CXX");
if (!afl_as) afl_as = "clang";
}
#endif /* __APPLE__ */
/* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
is not set. We need to check these non-standard variables to properly
handle the pass_thru logic later on. */
//检查TMPDIR、AFL_AS等环境变量确定临时目录和汇编器路径
if (!tmp_dir) tmp_dir = getenv("TEMP");
if (!tmp_dir) tmp_dir = getenv("TMP");
if (!tmp_dir) tmp_dir = "/tmp";
as_params = ck_alloc((argc + 32) * sizeof(u8*));
as_params[0] = afl_as ? afl_as : (u8*)"as";
as_params[argc] = 0;
for (i = 1; i < argc - 1; i++) {
if (!strcmp(argv[i], "--64")) use_64bit = 1;
else if (!strcmp(argv[i], "--32")) use_64bit = 0;
#ifdef __APPLE__
/* The Apple case is a bit different... */
//这段代码检查命令行参数中是否有 -arch并检查其后的参数。
//如果 -arch 后面是 x86_64则设置 use_64bit 为 1表示使用 64 位模式。
//如果 -arch 后面是 i386则输出错误信息并终止程序因为 32 位的 Apple 平台不被支持
if (!strcmp(argv[i], "-arch") && i + 1 < argc) {
if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1;
else if (!strcmp(argv[i + 1], "i386"))
FATAL("Sorry, 32-bit Apple platforms are not supported.");
}
/* Strip options that set the preference for a particular upstream
assembler in Xcode. */
//如果当前处于 clang 模式,并且命令行参数中有 -q 或 -Q则跳过这些参数不将它们传递给汇编器
if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
continue;
#endif /* __APPLE__ */
//将当前处理的命令行参数 argv[i] 添加到 as_params 数组中as_par_cnt 用于记录已添加的参数数量
as_params[as_par_cnt++] = argv[i];
}
#ifdef __APPLE__
/* When calling clang as the upstream assembler, append -c -x assembler
and hope for the best. */
//如果 use_clang_as 为真(即在 macOS 下使用 clang 作为汇编器)
//则向 as_params 数组中添加 -c、-x 和 assembler 参数
//以确保 clang 正确处理汇编文件
if (use_clang_as) {
as_params[as_par_cnt++] = "-c";
as_params[as_par_cnt++] = "-x";
as_params[as_par_cnt++] = "assembler";
}
#endif /* __APPLE__ */
//如果是 --version则设置 just_version 为 1并跳转到 wrap_things_up直接返回版本信息。
//如果输入文件以 - 开头但不是 --version则输出错误信息并终止程序。
//如果输入文件是单独的 -,则将 input_file 设置为 NULL表示从标准输入读取
input_file = argv[argc - 1];
if (input_file[0] == '-') {
if (!strcmp(input_file + 1, "-version")) {
just_version = 1;
modified_file = input_file;
goto wrap_things_up;
}
if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)");
else input_file = NULL;
} else {
/* Check if this looks like a standard invocation as a part of an attempt
to compile a program, rather than using gcc on an ad-hoc .s file in
a format we may not understand. This works around an issue compiling
NSS. */
//检查输入文件是否位于临时目录(如 /tmp 或 /var/tmp
//如果输入文件不在临时目录中,则设置 pass_thru 为 1表示直接传递文件内容而不进行插桩
if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
strncmp(input_file, "/var/tmp/", 9) &&
strncmp(input_file, "/tmp/", 5)) pass_thru = 1;
}
//生成一个临时文件名,用于存储插桩后的汇编文件。文件名格式为 tmp_dir/.afl-PID-TIMESTAMP.s
modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
(u32)time(NULL));
//将生成的临时文件名添加到 as_params 数组中,作为汇编器的输入文件,并将数组末尾设置为 NULL表示参数列表结束
wrap_things_up:
as_params[as_par_cnt++] = modified_file;
as_params[as_par_cnt] = NULL;
}
/* Process input file, generate modified_file. Insert instrumentation in all
the appropriate places. */
//处理输入文件并生成一个修改过的文件modified_file在其中插入 instrumentation仪器化代码。这个函数会读取原始的汇编文件根据配置插入instrumentation代码并将结果写入新的文件
static void add_instrumentation(void) {
//line[MAX_LINE]:用于存储从输入文件中读取的每一行。
//inf 和 outf分别表示输入文件和输出文件的指针。
//outfd输出文件的文件描述符。
//ins_lines记录插桩的行数。
static u8 line[MAX_LINE];
FILE* inf;
FILE* outf;
s32 outfd;
u32 ins_lines = 0;
u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0,
skip_intel = 0, skip_app = 0, instrument_next = 0;
#ifdef __APPLE__
u8* colon_pos;
#endif /* __APPLE__ */
//打开输入文件以供读取。
//如果 input_file 不为空,则打开该文件;否则从标准输入读取
if (input_file) {
inf = fopen(input_file, "r");
if (!inf) PFATAL("Unable to read '%s'", input_file);
} else inf = stdin;
//创建并打开输出文件以供写入。
//使用 open 创建文件,并使用 fdopen 将其转换为 FILE* 类型
outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);
if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);
outf = fdopen(outfd, "w");
if (!outf) PFATAL("fdopen() failed");
//
while (fgets(line, MAX_LINE, inf)) {
/* In some cases, we want to defer writing the instrumentation trampoline
until after all the labels, macros, comments, etc. If we're in this
mode, and if the line starts with a tab followed by a character, dump
the trampoline now. */
//在适当的位置插入插桩代码。
//如果满足条件(如不在跳过模式、处于 .text 段、需要插桩等),则插入插桩代码,并增加插桩行数
if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
instrument_next && line[0] == '\t' && isalpha(line[1])) {
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE));
instrument_next = 0;
ins_lines++;
}
/* Output the actual line, call it a day in pass-thru mode. */
//将当前行写入输出文件。
fputs(line, outf);
//如果处于 pass_thru 模式,则跳过后续处理
if (pass_thru) continue;
/* All right, this is where the actual fun begins. For one, we only want to
instrument the .text section. So, let's keep track of that in processed
files - and let's set instr_ok accordingly. */
//检测并处理 .text 段。
//如果当前行表示 .text 段,则设置 instr_ok 为 1表示可以插桩。
//如果当前行表示其他段(如 .bss 或 .data则设置 instr_ok 为 0表示跳过插桩
if (line[0] == '\t' && line[1] == '.') {
/* OpenBSD puts jump tables directly inline with the code, which is
a bit annoying. They use a specific format of p2align directives
around them, so we use that as a signal. */
if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;
if (!strncmp(line + 2, "text\n", 5) ||
!strncmp(line + 2, "section\t.text", 13) ||
!strncmp(line + 2, "section\t__TEXT,__text", 21) ||
!strncmp(line + 2, "section __TEXT,__text", 21)) {
instr_ok = 1;
continue;
}
if (!strncmp(line + 2, "section\t", 8) ||
!strncmp(line + 2, "section ", 8) ||
!strncmp(line + 2, "bss\n", 4) ||
!strncmp(line + 2, "data\n", 5)) {
instr_ok = 0;
continue;
}
}
/* Detect off-flavor assembly (rare, happens in gdb). When this is
encountered, we set skip_csect until the opposite directive is
seen, and we do not instrument. */
//处理 .code 指令。
//如果检测到 .code32 或 .code64则根据当前模式设置 skip_csect跳过插桩
if (strstr(line, ".code")) {
if (strstr(line, ".code32")) skip_csect = use_64bit;
if (strstr(line, ".code64")) skip_csect = !use_64bit;
}
/* Detect syntax changes, as could happen with hand-written assembly.
Skip Intel blocks, resume instrumentation when back to AT&T. */
//处理汇编语法变化。
//如果检测到 .intel_syntax则跳过插桩如果检测到 .att_syntax则恢复插桩
if (strstr(line, ".intel_syntax")) skip_intel = 1;
if (strstr(line, ".att_syntax")) skip_intel = 0;
/* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
//处理 #APP 和 #NO_APP 块。
//如果检测到 #APP则跳过插桩如果检测到 #NO_APP则恢复插桩
if (line[0] == '#' || line[1] == '#') {
if (strstr(line, "#APP")) skip_app = 1;
if (strstr(line, "#NO_APP")) skip_app = 0;
}
/* If we're in the right mood for instrumenting, check for function
names or conditional labels. This is a bit messy, but in essence,
we want to catch:
^main: - function entry point (always instrumented)
^.L0: - GCC branch label
^.LBB0_0: - clang branch label (but only in clang mode)
^\tjnz foo - conditional branches
...but not:
^# BB#0: - clang comments
^ # BB#0: - ditto
^.Ltmp0: - clang non-branch labels
^.LC0 - GCC non-branch labels
^.LBB0_0: - ditto (when in GCC mode)
^\tjmp foo - non-conditional jumps
Additionally, clang and GCC on MacOS X follow a different convention
with no leading dots on labels, hence the weird maze of #ifdefs
later on.
*/
//处理函数标签和条件分支。
//如果当前行是条件分支指令(如 jnz则插入插桩代码
if (skip_intel || skip_app || skip_csect || !instr_ok ||
line[0] == '#' || line[0] == ' ') continue;
/* Conditional branch instruction (jnz, etc). We append the instrumentation
right after the branch (to instrument the not-taken path) and at the
branch destination label (handled later on). */
if (line[0] == '\t') {
if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE));
ins_lines++;
}
continue;
}
/* Label of some sort. This may be a branch destination, but we need to
tread carefully and account for several different formatting
conventions. */
#ifdef __APPLE__
/* Apple: L<whatever><digit>: */
//处理标签。
//如果当前行是标签(如 .L0: 或 LBB0_0:),则根据需要设置 instrument_next表示后续需要插桩
if ((colon_pos = strstr(line, ":"))) {
if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
#else
/* Everybody else: .L<whatever>: */
if (strstr(line, ":")) {
if (line[0] == '.') {
#endif /* __APPLE__ */
/* .L0: or LBB0_0: style jump destination */
#ifdef __APPLE__
/* Apple: L<num> / LBB<num> */
if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
&& R(100) < inst_ratio) {
#else
/* Apple: .L<num> / .LBB<num> */
if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
&& R(100) < inst_ratio) {
#endif /* __APPLE__ */
/* An optimization is possible here by adding the code only if the
label is mentioned in the code in contexts other than call / jmp.
That said, this complicates the code by requiring two-pass
processing (messy with stdin), and results in a speed gain
typically under 10%, because compilers are generally pretty good
about not generating spurious intra-function jumps.
We use deferred output chiefly to avoid disrupting
.Lfunc_begin0-style exception handling calculations (a problem on
MacOS X). */
if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;
}
} else {
/* Function label (always instrumented, deferred mode). */
instrument_next = 1;
}
}
}
//在所有插桩完成后,插入主插桩代码
if (ins_lines)
fputs(use_64bit ? main_payload_64 : main_payload_32, outf);
//关闭文件并输出插桩结果。
//如果没有插桩目标,则输出警告;否则输出插桩的详细信息
if (input_file) fclose(inf);
fclose(outf);
if (!be_quiet) {
if (!ins_lines) WARNF("No instrumentation targets found%s.",
pass_thru ? " (pass-thru mode)" : "");
else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
ins_lines, use_64bit ? "64" : "32",
getenv("AFL_HARDEN") ? "hardened" :
(sanitizer ? "ASAN/MSAN" : "non-hardened"),
inst_ratio);
}
}
/* Main entry point */
//程序的主入口点。处理命令行参数设置随机种子调用edit_params来编辑参数
//根据环境变量AFL_INST_RATIO设置instrumentation概率
//然后调用add_instrumentation来添加instrumentation代码并最终执行as
int main(int argc, char** argv) {
s32 pid; //用于存储 fork 后的子进程 ID
u32 rand_seed; //用于存储随机种子
int status; //用于存储子进程的退出状态
u8* inst_ratio_str = getenv("AFL_INST_RATIO"); //从环境变量 AFL_INST_RATIO 中获取插桩比例
struct timeval tv;
struct timezone tz;
//检查是否处于 clang 模式。
//如果环境变量 CLANG_ENV_VAR 存在,则设置 clang_mode 为 1否则为 0
clang_mode = !!getenv(CLANG_ENV_VAR);
//检查是否在终端运行,并输出提示信息。
//如果标准错误输出是终端且未设置 AFL_QUIET则输出程序名称和版本信息否则设置 be_quiet 为 1表示静默模式
if (isatty(2) && !getenv("AFL_QUIET")) {
SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
} else be_quiet = 1;
//检查命令行参数是否足够。
//如果参数少于 2 个,则输出帮助信息并退出程序
if (argc < 2) {
SAYF("\n"
"This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n"
"executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n"
"don't want to run this program directly.\n\n"
"Rarely, when dealing with extremely complex projects, it may be advisable to\n"
"set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n"
"instrumenting every discovered branch.\n\n");
exit(1);
}
//生成并设置随机种子。
//使用当前时间、微秒数和进程 ID 生成随机种子,并调用 srandom 设置随机数生成器
gettimeofday(&tv, &tz);
rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
srandom(rand_seed);
//调用 edit_params 函数处理命令行参数。
//该函数会解析命令行参数并设置相关变量(如 input_file、modified_file 等)
edit_params(argc, argv);
//从环境变量 AFL_INST_RATIO 中读取插桩比例。
//如果插桩比例无效(不在 0 到 100 之间),则输出错误信息并终止程序
if (inst_ratio_str) {
if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100)
FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
}
//检查并设置环境变量 AS_LOOP_ENV_VAR。
//如果该环境变量已存在,则输出错误信息并终止程序(防止无限循环)。
//否则设置该环境变量为 "1"
if (getenv(AS_LOOP_ENV_VAR))
FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");
setenv(AS_LOOP_ENV_VAR, "1", 1);
/* When compiling with ASAN, we don't have a particularly elegant way to skip
ASAN-specific branches. But we can probabilistically compensate for
that... */
//处理 ASAN 或 MSAN 模式。
//如果启用了 ASAN 或 MSAN则设置 sanitizer 为 1并将插桩比例除以 3
if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
sanitizer = 1;
inst_ratio /= 3;
}
//调用 add_instrumentation 函数进行插桩。
//如果未设置 just_version则执行插桩操作
if (!just_version) add_instrumentation();
//创建子进程并执行 as 命令。
//使用 fork 创建子进程,并在子进程中调用 execvp 执行 as 命令。
//如果执行失败,则输出错误信息并终止程序
if (!(pid = fork())) {
execvp(as_params[0], (char**)as_params);//传递给as的参数数组
FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
}
//等待子进程结束。
//如果 fork 失败,则输出错误信息并终止程序。
//使用 waitpid 等待子进程结束,并获取其退出状态
if (pid < 0) PFATAL("fork() failed");
if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
//删除临时文件。
//如果未设置 AFL_KEEP_ASSEMBLY则删除生成的临时文件
if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);
//使用 WEXITSTATUS 获取子进程的退出状态,并将其作为程序的返回值
exit(WEXITSTATUS(status));
}