2024-02-24

qemuafl_analysis

总结一下关于AFL++ qemuafl fuzzing模式的实现方法，以供后面查阅

网上关于AFL分析的文章多之又多，但是关于AFL++的qemu mode分析没有找到。虽说AFL也有qemu mode，但是AFL++和AFL的qemu mode实现方式完全不同。本文想详细分析一下AFL++的qemu mode fuzzing。

入口

想要使用AFL++ qemu mode，需要在命令行中指定-Q参数。类似下面一行

1	afl-fuzz -Q -i input/ -o output -- $(target) @@

对应了afl-fuzz.c中下面部分

// 设置 afl->fsrv.qemu_mode = 1 作为标志
      case 'Q':                                                /* QEMU mode */
        if (afl->fsrv.qemu_mode) { FATAL("Multiple -Q options not supported"); }
        afl->fsrv.qemu_mode = 1;
        if (!mem_limit_given) { afl->fsrv.mem_limit = MEM_LIMIT_QEMU; }
        break;

关注qemu_mode变量走向，下面部分获取命令行中的其余参数。其中get_qemu_argv在afl-common.c中定义。该函数返回的use_argv参数为使用afl-qemu-trace替换之后的目标程序参数。

if (afl->fsrv.qemu_mode) {
    // afl还能针对wine模式进行fuzzing! 
    if (afl->use_wine) {
		...
    } else {
     // 读取输入的命令行参数
      use_argv = get_qemu_argv(argv[0], &afl->fsrv.target_path, argc - optind,
                               argv + optind);
    }
  }

/*下面是get_qemu_argv()的截取
# 和上面重写参数类似，不过这里返回的是new_argv，是greenhouse改掉了这一部分
  char *qemu_path = find_afl_binary(own_loc, "afl-qemu-trace");
  new_argv[0] = qemu_path;
  new_argv[1] = "-hackbind";
  new_argv[2] = "-hackproc";
  new_argv[3] = "-hookhack";
  new_argv[4] = "-execve";
  new_argv[5] = execve_arg;
  new_argv[6] = "--";
  new_argv[7] = *target_path_p;
  memcpy(&new_argv[8], &argv[1], (int)(sizeof(char *)) * (argc - 1))
  */

下面是一种use_argv的输出结果。下面的afl-qemu-trace即为aflqemu编译成的修改版qemu。这样修改是因为afl实际上的fuzz对象是afl-qemu-trace，需要将本身命令行中的目标二进制修改，让其作为qemu的参数。

# 输入
afl-fuzz -Q -i ./input -o ./output -- ./httpd -w /www @@
# 转化得到的new_argv
afl-qemu-trace -- ./httpd -w /www @@

在这之后，设置了一系列afl的属性

if (afl->non_instrumented_mode || afl->fsrv.qemu_mode ||
      afl->fsrv.frida_mode || afl->fsrv.cs_mode || afl->unicorn_mode) {

   # MAP_SIZE是共享内存大小
	map_size = afl->fsrv.real_map_size = afl->fsrv.map_size = MAP_SIZE;
	# virgin_bits在afl-fuzz.h中定义，表示在map中还没有被fuzz触碰到的bit
    afl->virgin_bits = ck_realloc(afl->virgin_bits, map_size);
	
	# virgin_tmouts在afl-fuzz.h中定义，表示还没有在time out中看到的bits
    afl->virgin_tmout = ck_realloc(afl->virgin_tmout, map_size);

	# virgin_crash在afl-fuzz.h中定义，表示还没有在crash中看到的bits
    afl->virgin_crash = ck_realloc(afl->virgin_crash, map_size);

	# var_bytes: 指看起来似乎是变量的byte
    afl->var_bytes = ck_realloc(afl->var_bytes, map_size);
	...
  }

接下来是创建共享内存。afl需要在共享内存中监视目标进程fuzzing的覆盖率。

// use_argv即之前get_qemu_argv返回值
afl->argv = use_argv;
// trace_bits： SHM with instrumentation bitmap。定义在include/forkserver.h
afl->fsrv.trace_bits =
      afl_shm_init(&afl->shm, afl->fsrv.map_size, afl->non_instrumented_mode);

这里调用afl_shm_init从而创建共享内存，这个函数在src/afl-sharedmem.c中定义。这是一个很长的函数。为了了解共享内存的实现细节，需要了解这部分如何生成。

共享内存

负责共享内存初始化的函数是afl_shm_init。在这个函数中，当使用USEMMAP宏的时候，将会不使用共享内存而是使用映射一段文件的方式来作为共享内存。在默认情况下(Linux系统中)，这部分内容不会开启。所以默认情况下，是使用shmat和shmget的组合来申请共享内存。

这两种方法有什么差异呢？

从性能上而言：shmat是在物理内存中开辟一段空间，mmap是在磁盘上开辟空间。但是如果机器上存在设备/dev/shm则两者性能差别不大。参考
从管理上而言，使用mmap则相当于产生一个文件，需要注意别的进程是否可能修改这个文件，以及释放空间等操作。shmat不需要关注此类问题。

以下是使用mmap申请内存的部分。申请的默认大小MAP_SIZE为65536，也就是64KB

u8 *afl_shm_init(sharedmem_t *shm, size_t map_size,
                 unsigned char non_instrumented_mode) {

  shm->map_size = 0;
  shm->map = NULL;
  shm->cmp_map = NULL;

// ###### 注意，默认情况下不使用这个宏  ######
#ifdef USEMMAP

  shm->g_shm_fd = -1;
  shm->cmplog_g_shm_fd = -1;
  const int shmflags = O_RDWR | O_EXCL;

  /* ======
  generate random file name for multi instance
  thanks to f*cking glibc we can not use tmpnam securely, it generates a
  security warning that cannot be suppressed
  so we do this worse workaround */
  // 生成路径文件("一切皆文件！")
  snprintf(shm->g_shm_file_path, L_tmpnam, "/afl_%d_%ld", getpid(), random());

  #ifdef SHM_LARGEPAGE_ALLOC_DEFAULT
  // 下面是一种针对特大页面的优化，调整页面大小从而能够加快内存读写速度，让更多内粗驻留在tlb中
  // https://man.freebsd.org/cgi/man.cgi?query=shm_open&sektion=2&n=1
  static size_t sizes[4] = {(size_t)-1};
  static int    psizes = 0;
  int           i;
  if (sizes[0] == (size_t)-1) { psizes = getpagesizes(sizes, 4); }

  /* very unlikely to fail even if the arch supports only two sizes */
  if (likely(psizes > 0)) {
	...
  }
  #endif


  // ######### 默认创建共享内存方式 #########
  // 使用shm_open并通过ftruncate控制共享内存大小
  if (shm->g_shm_fd == -1) {
    // shm->g_shm_fd指向共享内存
      shm->g_shm_fd =
        shm_open(shm->g_shm_file_path, shmflags | O_CREAT, DEFAULT_PERMISSION);
  }

  if (shm->g_shm_fd == -1) { PFATAL("shm_open() failed"); }
  if (ftruncate(shm->g_shm_fd, map_size)) {
    PFATAL("setup_shm(): ftruncate() failed");
  }


 // ######### 将上述内存映射到afl-fuzz地址空间中 #########
  // 将shm->g_shm_fd对应的共享内存映射到地址空间，得到的shm->map就是进程中的共享内存
    shm->map =
      mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm->g_shm_fd, 0);
  if (shm->map == MAP_FAILED) {

    close(shm->g_shm_fd);
    shm->g_shm_fd = -1;
    shm_unlink(shm->g_shm_file_path);
    shm->g_shm_file_path[0] = 0;
    PFATAL("mmap() failed");

  }

  /* If somebody is asking us to fuzz instrumented binaries in non-instrumented
     mode, we don't want them to detect instrumentation, since we won't be
     sending fork server commands. This should be replaced with better
     auto-detection later on, perhaps? */

  // 设置环境变量SHM_ENV_VAR，方便afl-qemu-trace读取共享内存
  if (!non_instrumented_mode) setenv(SHM_ENV_VAR, shm->g_shm_file_path, 1);

  if (shm->map == (void *)-1 || !shm->map) PFATAL("mmap() failed");

  if (shm->cmplog_mode) {
    ### cmplog部分不关注
  }
  ... 
  return shm->map

以下是使用shmat申请内存的部分。方法是一致的

#else // NOT USEMMAP
  u8 *shm_str;

  // for qemu+unicorn we have to increase by 8 to account for potential
  // compcov map overwrite
  // 使用shmget映射内存
  shm->shm_id =
      shmget(IPC_PRIVATE, map_size == MAP_SIZE ? map_size + 8 : map_size,
             IPC_CREAT | IPC_EXCL | DEFAULT_PERMISSION);
  if (shm->shm_id < 0) {

    PFATAL("shmget() failed, try running afl-system-config");

  }

// cplog mode是afl参数中加上-c得到的，一般不加
  if (shm->cmplog_mode) {
    ...
  }

// non_instrumented_mode是afl参数中加上-n得到的，一般不加，因此non_instrumented_mode=0
  if (!non_instrumented_mode) {
      ...
  }

// 下面的IPC_RMID表示该部分共享内存不能被别的进程可见，所以当前进程推出之后会被立刻清除
  shm->map = shmat(shm->shm_id, NULL, 0);
  if (shm->map == (void *)-1 || !shm->map) {
    shmctl(shm->shm_id, IPC_RMID, NULL);  // do not leak shmem
    if (shm->cmplog_mode) {
      shmctl(shm->cmplog_shm_id, IPC_RMID, NULL);  // do not leak shmem
    }
    PFATAL("shmat() failed");
  }

// 堆cmplog mode时一样的处理，避免内存泄漏
  if (shm->cmplog_mode) {
    shm->cmp_map = shmat(shm->cmplog_shm_id, NULL, 0);
    if (shm->cmp_map == (void *)-1 || !shm->cmp_map) {
      shmctl(shm->shm_id, IPC_RMID, NULL);  // do not leak shmem
      shmctl(shm->cmplog_shm_id, IPC_RMID, NULL);  // do not leak shmem
      PFATAL("shmat() failed");
    }
  }

#endif

在申请共享内存的过程中，总共做的是：

设置了shm->map作为本地的共享内存指针
设置了环境变量SHM_ENV_VAR.

forkserver

设置完共享内存之后，就可以将此地址交给forkserver，并由其分配给目标待测程序。这部分的核心在于afl_fsrv_start函数，其位于src/afl-forkserver.c。

这里有两个名称：afl-forkserver和afl-fauxserver。前者是一般情况下使用的，后者afl-fauxserver在非插桩状态下使用，也就是dumb模式，直接运行二进制文件，完全的黑盒测试。

1	afl->fsrv.use_fauxsrv = afl->non_instrumented_mode == 1 \|\| afl->no_forkserver;

forkserver是从afl-fuzz中fork出来的一个进程。这个进程单独用来产生待测程序。为什么要这么设计？因为一般情况下，直接执行二进制文件需要经过加载、链接等过程，较为耗时。而直接使用fork()方式产生新的进程，结合linux中COW的特点，速度将会显著提升。下面为其中的部分代码，位于src/afl-forkserver.c。需要注意在末尾，fork server实际上将被替换成目标待测进程。后面来详细说明为什么。

fsrv->fsrv_pid = fork();
  if (fsrv->fsrv_pid < 0) { PFATAL("fork() failed"); }
  if (!fsrv->fsrv_pid) {
      // 子进程作为fork server
      ...
    /* Set up control and status pipes, close the unneeded original fds. */
    // 设置控制管道和状态管道。这部分用来作为fork server和afl之间的消息传递通道
    if (dup2(ctl_pipe[0], FORKSRV_FD) < 0) { PFATAL("dup2() failed"); } // control
    if (dup2(st_pipe[1], FORKSRV_FD + 1) < 0) { PFATAL("dup2() failed"); } // status

    // 关闭不需要的管道
      
    close(ctl_pipe[0]);
    close(ctl_pipe[1]);
    close(st_pipe[0]);
    close(st_pipe[1]);

    close(fsrv->out_dir_fd);
    close(fsrv->dev_null_fd);
    close(fsrv->dev_urandom_fd);
      
   // 设置一系列环境变量
   // 例如MSAN的配置信息等，省略不写了
      
   // 产生子进程。注意是调用了一个函数指针，
   fsrv->init_child_func(fsrv, argv);
      
// 默认情况下，init_child_func指向以下函数。这里截取关键部分
static void fsrv_exec_child(afl_forkserver_t *fsrv, char **argv) {
  if (fsrv->qemu_mode || fsrv->cs_mode) {
    setenv("AFL_DISABLE_LLVM_INSTRUMENTATION", "1", 0);
  }
  // forkserver直接被替换成fsrv->target_path，也就是afl-qemu-trace
  execv(fsrv->target_path, argv);

qemuafl

使用execve执行目标进程之后，终于到了本文想要分析的核心位置，qemuafl具体的实现方式。

在qemu的translator_loop中，有以下代码(accel/tcg/translator.c)

if (db->pc_next == afl_entry_point) {
            afl_setup(); // 初始化AFL
            gen_helper_afl_entry_routine(cpu_env); // 设置forkserver相关内容
        }

而这里afl_entry_point是qemu内置的加载器在加载目标binary中，插入到目标binary .start部分的。也就是一进入qemu，就能执行afl_setup()

afl_setup

afl_setup函数。包含了一系列初始化过程。

读取并设置共享内存
读取一系列环境变量，包括AFL_INST_LIBS、AFL_CODE_START、AFL_CODE_END、AFL_QEMU_INST_RANGES，AFL_QEMU_EXCLUDE_RANGES，AFL_DEBUG，AFL_QEMU_COMPCOV，AFL_COMPCOV_LEVEL，AFL_QEMU_SNAPSHOT
在这之后，是gen_helper_afl_entry_routine，他是由HELPER(afl_entry_routine)产生的(tcg-runtime.c)。它会调用afl_forkserver函数，定义在tcg_runtime.c中

1
2
3

void HELPER(afl_entry_routine)(CPUArchState *env) {
  afl_forkserver(env_cpu(env));
}

afl_forkserver

之前提到，afl中的forkserver模块虽然从afl-fuzz中被独立出来了，但是随后调用execve执行了qemu。那么forkserver在哪里呢？答案就是在qemuafl中，新增了这部分的代码。

void afl_forkserver(CPUState *cpu) {

  // forkserver

  pid_t child_pid;
  int   t_fd[2];
  u8    child_stopped = 0;
  u32   was_killed;
  int   status = 0;

  // 从afl_setup中获取一些状态信息
  if (MAP_SIZE <= FS_OPT_MAX_MAPSIZE)
    status |= (FS_OPT_SET_MAPSIZE(MAP_SIZE) | FS_OPT_MAPSIZE);
  if (lkm_snapshot) status |= FS_OPT_SNAPSHOT;
  if (sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ;
  if (status) status |= (FS_OPT_ENABLED | FS_OPT_NEWCMPLOG);
  if (getenv("AFL_DEBUG"))
    fprintf(stderr, "Debug: Sending status %08x\n", status);
  memcpy(tmp, &status, 4);

  /* Tell the parent that we're alive. If the parent doesn't want
     to talk, assume that we're not running in forkserver mode. */
// 还记得刚才的控制管道吗?由于execve和fork都会继承文件描述符，因此之前FORKSRV_FD + 1依然存在
  if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
  afl_forksrv_pid = getpid();
  int first_run = 1;

  /* All right, let's await orders... */

  while (1) {

    /* Whoops, parent dead? */
	// 等待afl-fuzz发来开始fuzzing的指令
    if (read(FORKSRV_FD, &was_killed, 4) != 4) exit(2);

    /* If we stopped the child in persistent mode, but there was a race
       condition and afl-fuzz already issued SIGKILL, write off the old
       process. */

    if (child_stopped && was_killed) {

      child_stopped = 0;
      if (waitpid(child_pid, &status, 0) < 0) exit(8);

    }

    if (!child_stopped) {

      /* Establish a channel with child to grab translation commands. We'll
       read from t_fd[0], child will write to TSL_FD. */
	  // 产生t_fd管道，用于和子进程之间通信
      if (pipe(t_fd) || dup2(t_fd[1], TSL_FD) < 0) exit(3);
      close(t_fd[1]);

      // 这里看出：事实上当前的qemuafl担当了forkserver的角色，在while(1)循环中不断产生fork的子进程
      child_pid = fork();
      if (child_pid < 0) exit(4);

      if (!child_pid) {

        /* Child process. Close descriptors and run free. */
		// 子进程，关闭不需要的描述符，并返回接着执行目标程序
        afl_fork_child = 1;
        close(FORKSRV_FD);
        close(FORKSRV_FD + 1);
        close(t_fd[0]);
        return;

      }

      /* Parent. */

      close(TSL_FD);

    } else {

      /* Special handling for persistent mode: if the child is alive but
         currently stopped, simply restart it with SIGCONT. */

      kill(child_pid, SIGCONT);
      child_stopped = 0;

    }

    /* 父进程的剩余逻辑 */
    
	// 向afl-fuzz写入子进程pid
    if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) exit(5);

    /* Collect translation requests until child dies and closes the pipe. */
	// 等待子进程翻译指令。
    // 这是针对qemu模式下的一种优化。因为每一个新fork出来的进程都需要翻译很多指令，这里forkserver还担任一个cache的功能，也就是子进程未翻译的指令需要先从forkserver的翻译cache中寻找，如果找到了就直接使用。
    afl_wait_tsl(cpu, t_fd[0]);

    /* Get and relay exit status to parent. */

    if (waitpid(child_pid, &status, is_persistent ? WUNTRACED : 0) < 0) exit(6);

    /* In persistent mode, the child stops itself with SIGSTOP to indicate
       a successful run. In this case, we want to wake it up without forking
       again. */

    if (WIFSTOPPED(status))
      child_stopped = 1;
    else if (unlikely(first_run && is_persistent)) {

      fprintf(stderr, "[AFL] ERROR: no persistent iteration executed\n");
      exit(12);  // Persistent is wrong

    }

    first_run = 0;

    if (write(FORKSRV_FD + 1, &status, 4) != 4) exit(7);

  }

}

和上述代码交互的forkserver代码在src/afl-forkserver.c中

/* Execute target application, monitoring for timeouts. Return status
   information. The called program will update afl->fsrv->trace_bits. */

fsrv_run_result_t __attribute__((hot))
afl_fsrv_run_target(afl_forkserver_t *fsrv, u32 timeout,
                    volatile u8 *stop_soon_p) {

  s32 res;
  u32 exec_ms;
  // fsrv->last_run_timed_out有0或者1两个值，当上一个样例超时时，设置为1
  u32 write_value = fsrv->last_run_timed_out;

#ifdef __linux__
  if (fsrv->nyx_mode) {
// 不关注nyx_mode，跳过
    return FSRV_RUN_OK;
  }

#endif
  /* After this memset, fsrv->trace_bits[] are effectively volatile, so we
     must prevent any earlier operations from venturing into that
     territory. */

  // 每一次afl_fsrv_run_target时，都会清空trace_bits
  memset(fsrv->trace_bits, 0, fsrv->map_size);
  MEM_BARRIER();

  /* we have the fork server (or faux server) up and running
  First, tell it if the previous run timed out. */
  // 向fork server写入write_value(fsrv->last_run_timed_out)
  // 对应了之前代码中 read(FORKSRV_FD, &was_killed, 4)

  if ((res = write(fsrv->fsrv_ctl_fd, &write_value, 4)) != 4) {
    if (*stop_soon_p) { return 0; }
    RPFATAL(res, "Unable to request new process from fork server (OOM?)");

  }

  fsrv->last_run_timed_out = 0;

  // 读取forkserver发来的产生的子进程pid
  // forkserver实际不控制子进程终止，afl-fuzz控制
  if ((res = read(fsrv->fsrv_st_fd, &fsrv->child_pid, 4)) != 4) {

    if (*stop_soon_p) { return 0; }
    RPFATAL(res, "Unable to request new process from fork server (OOM?)");

  }

#ifdef AFL_PERSISTENT_RECORD
  // AFL_PERSISTENT_RECORD是指在PERSISTENT MODE下，可能无法直接通过一个样例复现漏洞，也许经过了多个样例才能CRASH。AFL_PERSISTENT_RECORD指定需要记录的CRASH之前的样本数量这边不详细写出
  if (unlikely(fsrv->persistent_record))
  }

#endif  // AFL_PERSISTENT_RECORD

// 小于0 的情况就是目标child已经被kill了，不存在了
// 这种情况出现的唯一可能是fork失败
  if (fsrv->child_pid <= 0) {

    if (*stop_soon_p) { return 0; }

    if ((fsrv->child_pid & FS_OPT_ERROR) &&
        FS_OPT_GET_ERROR(fsrv->child_pid) == FS_ERROR_SHM_OPEN)
      FATAL(
          "Target reported shared memory access failed (perhaps increase "
          "shared memory available).");

    FATAL("Fork server is misbehaving (OOM?)");

  }

// 读取执行时间?
  exec_ms = read_s32_timed(fsrv->fsrv_st_fd, &fsrv->child_status, timeout,
                           stop_soon_p);

  if (exec_ms > timeout) {

    // 子进程执行超时
    /* If there was no response from forkserver after timeout seconds,
    we kill the child. The forkserver should inform us afterwards */

    s32 tmp_pid = fsrv->child_pid;
    if (tmp_pid > 0) {

      kill(tmp_pid, fsrv->child_kill_signal);
      fsrv->child_pid = -1;

    }

    fsrv->last_run_timed_out = 1;
    if (read(fsrv->fsrv_st_fd, &fsrv->child_status, 4) < 4) { exec_ms = 0; }

  }

  if (!exec_ms) {

    if (*stop_soon_p) { return 0; }
    RPFATAL(res, "Unable to communicate with fork server");

  }

	// 检查目标进程是否终止，当子进程停止时返回true，否则false
  if (!WIFSTOPPED(fsrv->child_status)) { fsrv->child_pid = -1; }

  fsrv->total_execs++;

  /* Any subsequent operations on fsrv->trace_bits must not be moved by the
     compiler below this point. Past this location, fsrv->trace_bits[]
     behave very normally and do not have to be treated as volatile. */

  MEM_BARRIER();

  /* Report outcome to caller. */

  /* Was the run unsuccessful? */
  if (unlikely(*(u32 *)fsrv->trace_bits == EXEC_FAIL_SIG)) {

    return FSRV_RUN_ERROR;

  }

  /* Did we timeout? */
  if (unlikely(fsrv->last_run_timed_out)) {

    fsrv->last_kill_signal = fsrv->child_kill_signal;
    return FSRV_RUN_TMOUT;

  }

  /* Did we crash?
  In a normal case, (abort) WIFSIGNALED(child_status) will be set.
  MSAN in uses_asan mode uses a special exit code as it doesn't support
  abort_on_error. On top, a user may specify a custom AFL_CRASH_EXITCODE.
  Handle all three cases here. */
  // 判断子进程是否crash
  // 是通过WIFSIGNALED()根据子进程pid获取子进程的退出状态做到的

  if (unlikely(
          /* A normal crash/abort */
          (WIFSIGNALED(fsrv->child_status)) ||
          /* special handling for msan and lsan */
          (fsrv->uses_asan &&
           (WEXITSTATUS(fsrv->child_status) == MSAN_ERROR ||
            WEXITSTATUS(fsrv->child_status) == LSAN_ERROR)) ||
          /* the custom crash_exitcode was returned by the target */
          (fsrv->uses_crash_exitcode &&
           WEXITSTATUS(fsrv->child_status) == fsrv->crash_exitcode))) {


    /* For a proper crash, set last_kill_signal to WTERMSIG, else set it to 0 */
    fsrv->last_kill_signal =
        WIFSIGNALED(fsrv->child_status) ? WTERMSIG(fsrv->child_status) : 0;
    return FSRV_RUN_CRASH;

  }

  /* success :) */
  return FSRV_RUN_OK;

}

写入共享内存

被fork出的程序在哪里被写入共享内存？以下是最核心的一段宏

#if (defined(__x86_64__) || defined(__i386__)) && defined(AFL_QEMU_NOT_ZERO)
  #define INC_AFL_AREA(loc)           \
    asm volatile(                     \
        "addb $1, (%0, %1, 1)\n"      \
        "adcb $0, (%0, %1, 1)\n"      \
        : /* no out */                \
        : "r"(afl_area_ptr), "r"(loc) \
        : "memory", "eax")
#else
  #define INC_AFL_AREA(loc) afl_area_ptr[loc]++
#endif

这里引用了afl_area_ptr，并使用内联汇编的方式向其写入内容。事实上就是下面else部分的含义，即数组内存偏移位置++。这个宏在translate-all.c中被用到。

void HELPER(afl_maybe_log)(target_ulong cur_loc) {

  register uintptr_t afl_idx = cur_loc ^ afl_prev_loc; // <---- AFL 计算共享内存中偏移的方式

  INC_AFL_AREA(afl_idx); // 《---- 这里使用了这个宏，写入共享内存

  // afl_prev_loc = ((cur_loc & (MAP_SIZE - 1) >> 1)) |
  //                ((cur_loc & 1) << ((int)ceil(log2(MAP_SIZE)) -1));
  afl_prev_loc = cur_loc >> 1;

}

而gen_helper_afl_maybe_log位于translate-all.c中

/* Generates TCG code for AFL's tracing instrumentation. */
static void afl_gen_trace(target_ulong cur_loc) {

  /* Optimize for cur_loc > afl_end_code, which is the most likely case on
     Linux systems. */

  cur_block_is_good = afl_must_instrument(cur_loc); // 检查是不是宏定义中AFL_CODE_START和AFL_CODE_END指定的地址范围

  if (!cur_block_is_good)
    return;

  /* Looks like QEMU always maps to fixed locations, so ASLR is not a
     concern. Phew. But instruction addresses may be aligned. Let's mangle
     the value to get something quasi-uniform. */

  cur_loc = (uintptr_t)(afl_hash_ip((uint64_t)cur_loc));
  cur_loc &= (MAP_SIZE - 1);

  /* Implement probabilistic instrumentation by looking at scrambled block
     address. This keeps the instrumented locations stable across runs. */

  if (cur_loc >= afl_inst_rms) return;

  TCGv cur_loc_v = tcg_const_tl(cur_loc);
  gen_helper_afl_maybe_log(cur_loc_v); // <------ 生成写入trace_map的内联汇编
  tcg_temp_free(cur_loc_v);

}

最后的最后，afl_gen_trace的调用位于tb_gen_code，也就是qemu本身翻译代码的部分。从而将qemu与AFL结合起来了。

TranslationBlock *tb_gen_code(CPUState *cpu,
                              target_ulong pc, target_ulong cs_base,
                              uint32_t flags, int cflags)
...
    tcg_func_start(tcg_ctx);

    tcg_ctx->cpu = env_cpu(env);
    afl_gen_trace(pc); // <---- 插入的生成AFL语句的代码位置
    gen_intermediate_code(cpu, tb, max_insns);
    tcg_ctx->cpu = NULL;
    max_insns = tb->icount;

    trace_translate_block(tb, tb->pc, tb->tc.ptr);

fuzz_one

设置共享内存之后的下一个核心内容就是fuzzing目标进程了。这部分的主要函数名称是fuzz_one。

fuzz_one定义在src/afl-fuzz-one.c中。fuzz_one的主要流程是

打开queue
读取一个queue文件。这里的queue就是afl工作文件夹queue/下面的一个输入样例文件
如果这个文件是新文件，调用calibrate_case
在多重变异方法下，调用common_fuzz_stuff来执行目标文件，调用save_if_interesting判断当前变异是否引入了新路径，如果是则保存当前文件到queue文件夹下
调用calculate_score计算当前输入评分

在这里我从上向下介绍流程。上面提到和fork-server交互的部分位于afl_fsrv_run_target。经过搜索不难发现，fuzz_run_target是afl_fsrv_run_target的一个包装函数。而调用fuzz_run_target的地方，在于afl-fuzz-run.c中的common_fuzz_stuff。因此需要着重关注common_fuzz_stuff。

common_fuzz_stuff

common_fuzz_stuff读取一个queue文件，向qemuafl发送消息，使qemuafl产生一次fork并返回结果到fault。这个fault是一个fsrv_run_result_t类型的变量，一共有以下几种结果。

typedef enum fsrv_run_result {
  /* 00 */ FSRV_RUN_OK = 0,
  /* 01 */ FSRV_RUN_TMOUT,
  /* 02 */ FSRV_RUN_CRASH,
  /* 03 */ FSRV_RUN_ERROR,
  /* 04 */ FSRV_RUN_NOINST,
  /* 05 */ FSRV_RUN_NOBITS,
} fsrv_run_result_t;

下面的common_fuzz_stuff中包含了fuzz_run_target返回的fault。并在save_if_interesting中处理。

u8 __attribute__((hot))
common_fuzz_stuff(afl_state_t *afl, u8 *out_buf, u32 len) {
  u8 fault;
  len = write_to_testcase(afl, (void **)&out_buf, len, 0); // 向输出文件夹中/.mutation写入文件
  fault = fuzz_run_target(afl, &afl->fsrv, afl->fsrv.exec_tmout); // <--- forkserver向qemuafl发送消息，qemuafl产生一次fork并返回结果
  if (afl->stop_soon) { return 1; }
  if (fault == FSRV_RUN_TMOUT) {
    if (afl->subseq_tmouts++ > TMOUT_LIMIT) {
      ++afl->cur_skipped_items;
      return 1;
    }
  } else {
    afl->subseq_tmouts = 0;
  }

  /* Users can hit us with SIGUSR1 to request the current input
     to be abandoned. */
  if (afl->skip_requested) {
    afl->skip_requested = 0;
    ++afl->cur_skipped_items;
    return 1;
  }

  /* This handles FAULT_ERROR for us: */
  // 处理fault,这里的save_if_interesting下面详细说明
  afl->queued_discovered += save_if_interesting(afl, out_buf, len, fault);
  if (!(afl->stage_cur % afl->stats_update_freq) ||
      afl->stage_cur + 1 == afl->stage_max) {
    show_stats(afl);
  }
  return 0;
}

save_if_interesting

这个函数在common_fuzz_stuff被调用(common_fuzz_stuff在每次变异一次的时候都会被执行，是最主要的执行目标进程的函数)。因此save_if_interesting也将会被反复执行。这个函数检查execve的结果是否是有趣的，如果是就保存下来，并返回1。

在afl中有一个配置选项afl->crash_mode，代表了最常见情况下，执行目标进程的退出信号。默认情况下afl->crash_mode是0，也就是fsrv_run_result中FSRV_RUN_OK。

crash_mode在save_if_interesting被使用，他代表常见情况下，程序执行的结果是什么。例如在crash mode=0时，下面代码就代表了在fault为FSRV_RUN_OK(0)时，afl->crash_mode也为0时，也就是默认情况下找到interesting case的方法：调用has_new_bits_unclassified寻找是否产生了新的bits。如果是的话，就写入queue中。

调用has_new_bits_unclassified的地方在save_if_interesting。这是一个很长的函数，下面我只截取出重要部分。

u8 __attribute__((hot))
save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
    ...;
    if (likely(fault == afl->crash_mode)) {
        // 一般情况下，fault=0(执行不产生异常), afl->crash_mode也是0
        new_bits = has_new_bits_unclassified(afl, afl->virgin_bits);
        // 比较常见的情况是没有new_bits	
    	if (likely(!new_bits)) {
      		if (unlikely(afl->crash_mode)) { ++afl->total_crashes; }
            // 没有新的bits，直接返回
      		return 0;
    }
    ...;
    //能产生新的coverage的输入保存到文件
    queue_fn = alloc_printf("%s/queue/id_%06u", afl->out_dir, afl->queued_items);
    ...;
    // 对于一个新的样本，调用calibrate_case计算其分数
    res = calibrate_case(afl, afl->queue_top, mem, afl->queue_cycle - 1, 0);
    ...;
    switch (fault) {
            // 根据参数fault
            case FSRV_RUN_TMOUT:
            	...;
            	break;
            case FSRV_RUN_CRASH:
            	// yeah!
            	...;
            	break;
    }
    }

最后common_fuzz_stuff，在哪里被调用？

afl-fuzz-one.c

自顶向下寻找，上述函数可以追溯到afl-fuzz-one.c中，被多次调用。事实上，每一次调用common_fuzz_stuff时，就代表afl-fuzz对一个queue(一个输入样例)产生了一次变异，包括算术变换、拼接，铰接等等。每变异一次，就调用一次common_fuzz_stuff，命令fork server产生一次fork，等待目标进程结束，判断是否存在产生interesting的输入，或者产生crash。

calibrate case

函数的签名已经将函数的意义说的很清楚了

1
2
3

/* Calibrate a new test case. This is done when processing the input directory
   to warn about flaky or otherwise problematic test cases early on; and when
   new paths are discovered to detect variable behavior and so on. */

这个函数不会在很多地方调用。如果排除unlikely的调用位置，只有在afl-fuzz-init时处理初始输入以及获取了一个新的interesting case，将要放入queue中时才会调用。主要是判断input文件夹中的输入样例是否是正常的，以及在新的case产生时用于评估这个新的测试样例执行时间、覆盖率等信息。

这个函数比较复杂，并且和qemuafl关系不密切，因此可以参考这篇文章afl源码解析

总结

qemuafl充当了地址翻译cache和forkserver两个角色。如果想要修改共享map的结构，可以想象那些地方需要修改。

calibrate_case中判断是否有new bytes以及virgin bytes等
计算一个输入样例评分时
common fuzz stuff判断一个fuzzing状态下的变异是否存在新bits时
…

参考文章

基于qemu和unicorn的Fuzz技术分析 - 先知社区 (aliyun.com)

包含了qemu的执行流程中所有调用的函数

本文标题:qemuafl_analysis

文章作者:

发布时间:2024-02-24, 20:06:47

最后更新:2024-02-25, 12:44:18

原始链接:https://nicholas-wei.github.io/2024/02/24/qemuafl-analysis/

许可协议: "署名-非商用-相同方式共享 4.0" 转载请保留原文链接及作者。