模糊测试开发实战：构建Bochs、MMU与文件I/O系统

背景

这是系列博客的第三篇，详细记录了开发基于Bochs的快照模糊测试器的过程。相关代码可在Lucid代码库中获取。

系统调用基础设施更新

根据Fuzzing Discord专家WorksButNotTested的建议，我们简化了系统调用处理机制。不再使用复杂的上下文切换和寄存器转换例程，而是直接让Bochs从C代码调用Rust函数处理系统调用。

改进前代码

1
2
3
4
5
6
7
8
9


static __inline long __syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6)
{
    if (!g_lucid_ctx) { return __syscall6_original(n, a1, a2, a3, a4, a5, a6); }
    
    register long ret;
    register long r12 __asm__("r12") = (size_t)(g_lucid_ctx->exit_handler);
    // ... 复杂的内联汇编代码
    return ret;
}

改进后代码

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


static __inline long __syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6)
{
    if (g_lucid_syscall)
        return g_lucid_syscall(g_lucid_ctx, n, a1, a2, a3, a4, a5, a6);
    
    unsigned long ret;
    register long r10 __asm__("r10") = a4;
    register long r8 __asm__("r8") = a5;
    register long r9 __asm__("r9") = a6;
    __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2),
                          "d"(a3), "r"(r10), "r"(r8), "r"(r9) : "rcx", "r11", "memory");
    return ret;
}

Rust端对应的函数签名：

1
2
3


pub extern "C" fn lucid_syscall(contextp: *mut LucidContext, n: usize,
    a1: usize, a2: usize, a3: usize, a4: usize, a5: usize, a6: usize)
    -> u64

调用约定变更

在重构系统调用处理时，我们简化了上下文切换的调用约定。现在只需传递Lucid执行上下文的指针，让context_switch函数根据上下文值决定行为。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15


extern "C" { fn context_switch(); }
global_asm!(
    ".global context_switch",
    "context_switch:",
    "pushfq",
    "push r14",
    "push r13",
    // 确定执行模式
    "mov r14, r15",
    "add r14, 0x8",     // mode位于基址偏移0x8处
    "mov r14, [r14]",
    "cmp r14d, 0x0",
    "je save_bochs",
    // ... 其余代码
);

引入故障处理

我们引入了Fault错误类，用于处理上下文切换或系统调用处理过程中遇到的错误。这些故障会被传递回Lucid进行处理。

1
2
3
4
5
6
7
8


pub fn fault_handler(contextp: *mut LucidContext, fault: Fault) {
    let context = unsafe { &mut *contextp };
    match fault {
        Fault::Success => context.fault = Fault::Success,
        // ... 其他故障处理
    }
    restore_lucid_execution(contextp);
}

沙箱化线程本地存储

通过修改Musl代码，我们将直接的系统调用指令替换为使用系统调用包装函数，从而能够拦截和处理TLS相关的操作。

1
2
3
4
5
6
7
8


int __init_tp(void *p)
{
    pthread_t td = p;
    td->self = td;
    int r = syscall(SYS_arch_prctl, ARCH_SET_FS, TP_ADJ(p));
    // 替换原来的：int r = __set_thread_area(TP_ADJ(p));
    // ... 其余代码
}

构建Bochs

使用musl-cross-make项目构建完整的工具链，包括支持自定义Musl的C++标准库。Bochs配置脚本如下：

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29


#!/bin/sh

CC="/path/to/x86_64-linux-musl-gcc"
CXX="/path/to/x86_64-linux-musl-g++"
CFLAGS="-Wall --static-pie -fPIE"
CXXFLAGS="$CFLAGS"

export CC
export CXX
export CFLAGS
export CXXFLAGS

./configure --enable-sb16 \
            --enable-all-optimizations \
            --enable-long-phy-address \
            --enable-a20-pin \
            --enable-cpu-level=6 \
            --enable-x86-64 \
            --enable-vmx=2 \
            --enable-pci \
            --enable-usb \
            --enable-usb-ohci \
            --enable-usb-ehci \
            --enable-usb-xhci \
            --enable-busmouse \
            --enable-e1000 \
            --enable-show-ips \
            --enable-avx \
            --with-nogui

实现简易MMU

我们实现了内存管理单元来处理brk、mmap和munmap系统调用。MMU预分配两个内存池：一个用于brk调用，一个用于mmap调用。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11


#[derive(Clone)]
pub struct Mmu {
    pub brk_base: usize,        // brk区域基址
    pub brk_size: usize,        // 程序中断区域大小
    pub curr_brk: usize,        // 当前程序中断
    
    pub mmap_base: usize,       // mmap池基址
    pub mmap_size: usize,       // mmap池大小
    pub curr_mmap: usize,       // 当前mmap页基址
    pub next_mmap: usize,       // 下一个分配基址
}

brk处理

1
2
3
4
5
6
7


pub fn update_brk(&mut self, addr: usize) -> Result<(), ()> {
    if addr == 0 { return Ok(()); }
    let limit = self.brk_base + self.brk_size;
    if !(self.curr_brk..limit).contains(&addr) { return Err(()); }
    self.curr_brk = addr;
    Ok(())
}

mmap处理

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21


pub fn do_mmap(
    &mut self,
    len: usize,
    prot: usize,
    flags: usize,
    fd: usize,
    offset: usize
) -> Result<(), ()> {
    let len = (len + PAGE_SIZE - 1) & !(PAGE_SIZE - 1);
    if len + self.next_mmap > self.mmap_base + self.mmap_size { 
        return Err(());
    }
    // 参数检查
    if prot as i32 != libc::PROT_READ | libc::PROT_WRITE {
        return Err(())
    }
    // ... 其他检查
    self.curr_mmap = self.next_mmap;
    self.next_mmap = self.curr_mmap + len;
    Ok(())
}

文件I/O实现

通过预读取和存储所需文件内容到内存中，我们模拟了文件I/O系统调用。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12


#[derive(Clone)]
pub struct FileTable {
    files: Vec<File>,
}

#[derive(Clone)]
pub struct File {
    pub fd: i32,            // 文件描述符
    pub path: String,       // 文件路径
    pub contents: Vec<u8>,  // 文件内容
    pub cursor: usize,      // 当前文件游标
}

读取文件处理

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20


// read系统调用处理
0x0 => {
    let Some(file) = context.files.get_file(a1 as i32) else {
        fault!(contextp, Fault::NoFile);
    };
    let buf_p = a2 as *mut u8;
    if buf_p.is_null() {
        context.tls.errno = libc::EINVAL;
        return -1_i64 as u64;
    }
    let length = std::cmp::min(a3, file.contents.len() - file.cursor);
    unsafe { 
        std::ptr::copy(
            file.contents.as_ptr().add(file.cursor),
            buf_p,
            length);
    }
    file.cursor += length;
    length as u64
},

结论

模糊测试器的开发工作仍在继续，下一步将选择模糊测试目标并使其在Bochs中运行。我们需要对Bochs模拟的系统进行修改，使其能够运行目标程序以便进行快照和模糊测试。