内核赛周记1

#os #coding #rust #proj

内核赛第一周，这份文档用于记录开发过程，以及写一些模块的思路 & 代码。

lwext4

参考 phoenix 的实现，用到了 lwext4 file system，所以打算把 lwext4 接进我们的内核。

块设备

lwext4 为了可以更加通用，把整个文件系统和块设备解耦开了，留了 KernelDevOp 的接口方法需要我们去实现。这几个方法，会被文件系统用于操作块设备（读取、写入、定位）。

首先我们需要为 virtio-driver 实现环形队列的创建与分配使用。这部分和 rcore 的实现会有一些出入（似乎 rcore 的 virtio-drvier 的版本比较低）。但代码总体框架还是可以沿用的。具体实现参考了 Titanix 和 Phoenix 的驱动。

// in os/src/drivers/block/virtio_blk.rs

lazy_static! {
    static ref QUEUE_FRAMES: UPSafeCell<Vec<FrameTracker>> = unsafe { UPSafeCell::new(Vec::new()) };
}

pub struct VirtioHal;

unsafe impl virtio_drivers::Hal for VirtioHal {
    fn dma_alloc(pages: usize, _direction: BufferDirection,) -> (virtio_drivers::PhysAddr, NonNull<u8>) {
        info!("dma_alloc");
        let mut ppn_base = PhysPageNum(0);
        for i in 0..pages {
            let frame = frame_alloc().unwrap();
            if i == 0 {
                ppn_base = frame.ppn;
            }
            assert_eq!(frame.ppn.0, ppn_base.0 + i);
            QUEUE_FRAMES.exclusive_access().push(frame);
        }
        let pa: PhysAddr = ppn_base.into();
        (pa.0, NonNull::new(pa.get_mut::<u8>()).unwrap())
    }

    unsafe fn dma_dealloc(paddr: virtio_drivers::PhysAddr, _vaddr: NonNull<u8>, pages: usize) -> i32 {
        info!("dma_dealloc");
        let pa = PhysAddr::from(paddr);
        let mut ppn_base: PhysPageNum = pa.into();
        for _ in 0..pages {
            frame_dealloc(ppn_base);
            ppn_base.step();
        }
        0
    }

    unsafe fn mmio_phys_to_virt(paddr: virtio_drivers::PhysAddr, _size: usize) -> NonNull<u8> {
        NonNull::new(PhysAddr::from(paddr).get_mut::<u8>()).unwrap()
    }

    unsafe fn share(
        buffer: NonNull<[u8]>,
        _direction: BufferDirection,
    ) -> virtio_drivers::PhysAddr {
        // use kernel space pagetable to get the physical address
        let page_table = PageTable::from_token(KERNEL_SPACE.exclusive_access().token());
        let pa = page_table.translate_va(VirtAddr::from(buffer.as_ptr() as *const u8 as usize)).unwrap();
        
        pa.0
    }

    unsafe fn unshare(
        _paddr: virtio_drivers::PhysAddr,
        _buffer: NonNull<[u8]>,
        _direction: BufferDirection,
    ) {
    }
}

注意这里虚实地址的转换，后期我们可能会使用用户态的指针来打开文件，需要尤其小心！但现在的 Chronix 还不支持这个操作。

为 virtio-driver 实现了环形队列之后，就可以用 virtio-blk 来实现我们的块设备（VirtIOBlock + BlockDevice 方法）了。

// in os/src/drivers/block/virtio_blk.rs
const VIRTIO0: usize = 0x10001000;

pub struct VirtIOBlock(UPSafeCell<VirtIOBlk<VirtioHal, MmioTransport>>);

impl VirtIOBlock {
    #[allow(unused)]
    pub fn new() -> Self {
        unsafe {
            let header = core::ptr::NonNull::new(VIRTIO0 as *mut VirtIOHeader).unwrap();
            let transport = MmioTransport::new(header).unwrap();
            Self(UPSafeCell::new(
                VirtIOBlk::<VirtioHal, MmioTransport>::new(transport).expect("failed to create blk driver"),
            ))
        }
    }
}

impl BlockDevice for VirtIOBlock {

    fn size(&self) -> u64 {
        self.0
            .exclusive_access()
            .capacity() * (BLOCK_SIZE as u64)
    }

    fn block_size(&self) -> usize {
        BLOCK_SIZE
    }
    
    fn read_block(&self, block_id: usize, buf: &mut [u8]) {
        self.0
            .exclusive_access()
            .read_blocks(block_id, buf)
            .expect("Error when reading VirtIOBlk");
    }
    fn write_block(&self, block_id: usize, buf: &[u8]) {
        self.0
            .exclusive_access()
            .write_blocks(block_id, buf)
            .expect("Error when writing VirtIOBlk");
    }
}

// in os/src/drivers/block/mod.rs
lazy_static! {
    pub static ref BLOCK_DEVICE: Arc<dyn BlockDevice> = Arc::new(BlockDeviceImpl::new());
}

Disk

完成了块设备后，我们还需要为文件系统提供一个有 KernelDevOp 方法的对象用于操作。参考 Phoenix 、lwext4 下的 example os，这里会实现一个带有 KernelDevOp 的 Disk 对象。

/// A disk device with a cursor.
pub struct Disk {
    block_id: usize,
    offset: usize,
    dev: Arc<dyn BlockDevice>,
}

impl Disk {
    /// Create a new disk.
    pub fn new(dev: Arc<dyn BlockDevice>) -> Self {
        
        Self {
            block_id: 0,
            offset: 0,
            dev,
        }
    }

    /// Get the size of the disk.
    /// capacity() 以512 byte为单位
    pub fn size(&self) -> u64 {
        self.dev.size()
    }

    /// Get the position of the cursor.
    pub fn position(&self) -> u64 {
        (self.block_id * BLOCK_SIZE + self.offset) as u64
    }

    /// Set the position of the cursor.
    pub fn set_position(&mut self, pos: u64) {
        self.block_id = pos as usize / BLOCK_SIZE;
        self.offset = pos as usize % BLOCK_SIZE;
    }

    /// Read within one block, returns the number of bytes read.
    pub fn read_one(&mut self, buf: &mut [u8]) -> Result<usize, i32> {
        // info!("block id: {}", self.block_id);
        let read_size = if self.offset == 0 && buf.len() >= BLOCK_SIZE {
            // whole block
            self.dev.read_block(self.block_id, &mut buf[0..BLOCK_SIZE]);
            self.block_id += 1;
            BLOCK_SIZE
        } else {
            // partial block
            let mut data = [0u8; BLOCK_SIZE];
            let start = self.offset;
            let count = buf.len().min(BLOCK_SIZE - self.offset);
            if start > BLOCK_SIZE {
                info!("block size: {} start {}", BLOCK_SIZE, start);
            }

            self.dev.read_block(self.block_id, &mut data);
            buf[..count].copy_from_slice(&data[start..start + count]);

            self.offset += count;
            if self.offset >= BLOCK_SIZE {
                self.block_id += 1;
                self.offset -= BLOCK_SIZE;
            }
            count
        };
        Ok(read_size)
    }

    /// Write within one block, returns the number of bytes written.
    pub fn write_one(&mut self, buf: &[u8]) -> Result<usize, i32> {
        let write_size = if self.offset == 0 && buf.len() >= BLOCK_SIZE {
            // whole block
            self.dev.write_block(self.block_id, &buf[0..BLOCK_SIZE]);
            self.block_id += 1;
            BLOCK_SIZE
        } else {
            // partial block
            let mut data = [0u8; BLOCK_SIZE];
            let start = self.offset;
            let count = buf.len().min(BLOCK_SIZE - self.offset);

            self.dev.read_block(self.block_id, &mut data);
            data[start..start + count].copy_from_slice(&buf[..count]);
            self.dev.write_block(self.block_id, &data);

            self.offset += count;
            if self.offset >= BLOCK_SIZE {
                self.block_id += 1;
                self.offset -= BLOCK_SIZE;
            }
            count
        };
        Ok(write_size)
    }
}

impl KernelDevOp for Disk {
    //type DevType = Box<Disk>;
    type DevType = Disk;

    fn read(dev: &mut Self, mut buf: &mut [u8]) -> Result<usize, i32> {
        debug!("READ block device buf={}", buf.len());
        let mut read_len = 0;
        while !buf.is_empty() {
            match dev.read_one(buf) {
                Ok(0) => break,
                Ok(n) => {
                    let tmp = buf;
                    buf = &mut tmp[n..];
                    read_len += n;
                }
                Err(_e) => return Err(-1),
            }
        }
        debug!("READ rt len={}", read_len);
        Ok(read_len)
    }
    fn write(dev: &mut Self, mut buf: &[u8]) -> Result<usize, i32> {
        debug!("WRITE block device buf={}", buf.len());
        let mut write_len = 0;
        while !buf.is_empty() {
            match dev.write_one(buf) {
                Ok(0) => break,
                Ok(n) => {
                    buf = &buf[n..];
                    write_len += n;
                }
                Err(_e) => return Err(-1),
            }
        }
        debug!("WRITE rt len={}", write_len);
        Ok(write_len)
    }
    fn flush(_dev: &mut Self::DevType) -> Result<usize, i32> {
        Ok(0)
    }
    fn seek(dev: &mut Self, off: i64, whence: i32) -> Result<i64, i32> {
        let size = dev.size();
        debug!(
            "SEEK block device size:{}, pos:{}, offset={}, whence={}",
            size,
            &dev.position(),
            off,
            whence
        );
        let new_pos = match whence as u32 {
            lwext4_rust::bindings::SEEK_SET => Some(off),
            lwext4_rust::bindings::SEEK_CUR => {
                dev.position().checked_add_signed(off).map(|v| v as i64)
            }
            lwext4_rust::bindings::SEEK_END => size.checked_add_signed(off).map(|v| v as i64),
            _ => {
                error!("invalid seek() whence: {}", whence);
                Some(off)
            }
        }
        .ok_or(-1)?;

        if new_pos as u64 > size {
            warn!("Seek beyond the end of the block device");
        }
        dev.set_position(new_pos as u64);
        Ok(new_pos)
    }
}

至此完成了我们为文件系统提供底层的支持。

VFS

现在我们可以开始研究、使用这个文件系统为上层提供的接口了。lwext4 为上层提供了 Ext4File 对象用于操作文件/目录，提供 Ext4BlockWrapper 对象用于初始化块设备 & 创建文件系统。我的设计思路是：写一个 VFS 层，类似 rcore，向上提供一样接口，这样可以最大程度利用已有代码。

对 Ext4File 的包装：

1 2	`/// The inode of the Ext4 filesystem pub struct Inode(RefCell<Ext4File>);`

接着为其实现和 rcore 的 EFS 类似的方法，但用到 ExtFile 提供的操作。部分参考了 ArceOS。

unsafe impl Send for Inode {}
unsafe impl Sync for Inode {}
impl Inode {
    /// Create a new inode
    pub fn new(path: &str, types: InodeTypes) -> Self {
        info!("Inode new {:?} {}", types, path);
        //file.file_read_test("/test/test.txt", &mut buf);

        Self(RefCell::new(Ext4File::new(path, types)))
    }

    fn path_deal_with(&self, path: &str) -> String {
        if path.starts_with('/') {
            warn!("path_deal_with: {}", path);
        }
        let p = path.trim_matches('/'); // 首尾去除
        if p.is_empty() || p == "." {
            return String::new();
        }

        if let Some(rest) = p.strip_prefix("./") {
            //if starts with "./"
            return self.path_deal_with(rest);
        }
        let rest_p = p.replace("//", "/");
        if p != rest_p {
            return self.path_deal_with(&rest_p);
        }

        //Todo ? ../
        //注：lwext4创建文件必须提供文件path的绝对路径
        let file = self.0.borrow_mut();
        let path = file.get_path();
        let fpath = String::from(path.to_str().unwrap().trim_end_matches('/')) + "/" + p;
        info!("dealt with full path: {}", fpath.as_str());
        fpath
    }

    /// Find inode under current inode by name
    #[allow(unused)]
    pub fn find(&self, name: &str) -> Option<Arc<Inode>> {
        let file = self.0.borrow_mut();
        info!("find name: {} in {}", name, file.get_path().to_str().unwrap());
        let (names, inode_type) = file.lwext4_dir_entries().unwrap();
        info!("out lwext4_dir_entries");
        let mut name_iter = names.iter();
        let mut inode_type_iter = inode_type.iter();

        info!("into while");
        while let Some(iname) = name_iter.next() {
            let itypes = inode_type_iter.next();
            info!("iname: {}", core::str::from_utf8(iname).unwrap());
            if core::str::from_utf8(iname).unwrap().trim_end_matches('\0') == name {
                info!("find {} success", name);

                // lwext4 needs full path
                let full_path = String::from(file.get_path().to_str().unwrap().trim_end_matches('/')) + "/" + name;
                return Some(Arc::new(Inode::new(full_path.as_str(), itypes.unwrap().clone())));
            }
        }

        info!("find {} failed", name);
        None
    }

    /// Look up the node with given `name` in the directory
    /// Return the node if found.
    pub fn lookup(&self, name: &str) -> Option<Arc<Inode>> {
        let mut file = self.0.borrow_mut();
        
        let full_path = String::from(file.get_path().to_str().unwrap().trim_end_matches('/')) + "/" + name;
        
        if file.check_inode_exist(full_path.as_str(), InodeTypes::EXT4_DE_REG_FILE) {
            info!("lookup {} success", name);
            return Some(Arc::new(Inode::new(full_path.as_str(), InodeTypes::EXT4_DE_REG_FILE)));
        }

        // todo!: add support for directory

        info!("lookup {} failed", name);
        None
    }

    /// list all files' name in the directory
    #[allow(unused)]
    pub fn ls(&self) -> Vec<String> {
        let file = self.0.borrow_mut();

        if file.get_type() != InodeTypes::EXT4_DE_DIR {
            info!("not a directory");
        }

        let (name, inode_type) = match file.lwext4_dir_entries() {
            Ok((name, inode_type)) => (name, inode_type),
            Err(e) => {
                panic!("error when ls: {}", e);
            }
        };

        let mut name_iter = name.iter();
        let  _inode_type_iter = inode_type.iter();

        let mut names = Vec::new();
        while let Some(iname) = name_iter.next() {
            names.push(String::from(core::str::from_utf8(iname).unwrap()));
        }
        names
    }

    /// Read data from inode at offset
    pub fn read_at(&self, offset: usize, buf: &mut [u8]) -> Result<usize, i32> {
        debug!("To read_at {}, buf len={}", offset, buf.len());
        let mut file = self.0.borrow_mut();
        let path = file.get_path();
        let path = path.to_str().unwrap();
        file.file_open(path, O_RDONLY)?;

        file.file_seek(offset as i64, SEEK_SET)?;
        let r = file.file_read(buf);

        let _ = file.file_close();
        r
    }

    /// Write data to inode at offset
    pub fn write_at(&self, offset: usize, buf: &[u8]) -> Result<usize, i32> {
        debug!("To write_at {}, buf len={}", offset, buf.len());
        let mut file = self.0.borrow_mut();
        let path = file.get_path();
        let path = path.to_str().unwrap();
        file.file_open(path, O_RDWR)?;

        file.file_seek(offset as i64, SEEK_SET)?;
        let r = file.file_write(buf);

        let _ = file.file_close();
        r
    }

    /// Truncate the inode to the given size
    pub fn truncate(&self, size: u64) -> Result<usize, i32> {
        info!("truncate file to size={}", size);
        let mut file = self.0.borrow_mut();
        let path = file.get_path();
        let path = path.to_str().unwrap();
        file.file_open(path, O_RDWR)?;

        let t = file.file_truncate(size);

        let _ = file.file_close();
        t
    }

    /// Create a new inode and return the inode
    pub fn create(&self, path: &str, ty: InodeTypes) -> Option<Arc<Inode>> {
        info!("create {:?} on Ext4fs: {}", ty, path);
        let fpath = self.path_deal_with(path);
        let fpath = fpath.as_str();
        if fpath.is_empty() {
            info!("given path is empty");
            return None;
        }

        let types = ty;

        let mut file = self.0.borrow_mut();

        let result = if file.check_inode_exist(fpath, types.clone()) {
            info!("inode already exists");
            Ok(0)
        } else {
            if types == InodeTypes::EXT4_DE_DIR {
                file.dir_mk(fpath)
            } else {
                file.file_open(fpath, O_WRONLY | O_CREAT | O_TRUNC)
                    .expect("create file failed");
                file.file_close()
            }
        };

        match result {
            Err(e) => {
                error!("create inode failed: {}", e);
                None
            }
            Ok(_) => {
                info!("create inode success");
                Some(Arc::new(Inode::new(fpath, types)))
            }
        }
    }

    /// Remove the inode
    #[allow(unused)]
    fn remove(&self, path: &str) -> Result<usize, i32> {
        info!("remove ext4fs: {}", path);
        let fpath = self.path_deal_with(path);
        let fpath = fpath.as_str();

        assert!(!fpath.is_empty()); // already check at `root.rs`

        let mut file = self.0.borrow_mut();
        if file.check_inode_exist(fpath, InodeTypes::EXT4_DE_DIR) {
            // Recursive directory remove
            file.dir_rm(fpath)
        } else {
            file.file_remove(fpath)
        }
    }

    /// Get the parent directory of this directory.
    /// Return `None` if the node is a file.
    #[allow(unused)]
    fn parent(&self) -> Option<Arc<Inode>> {
        let file = self.0.borrow_mut();
        if file.get_type() == InodeTypes::EXT4_DE_DIR {
            let path = file.get_path();
            let path = path.to_str().unwrap();
            info!("Get the parent dir of {}", path);
            let path = path.trim_end_matches('/').trim_end_matches(|c| c != '/');
            if !path.is_empty() {
                return Some(Arc::new(Self::new(path, InodeTypes::EXT4_DE_DIR)));
            }
        }
        None
    }

    /// Rename the inode
    #[allow(unused)]
    fn rename(&self, src_path: &str, dst_path: &str) -> Result<usize, i32> {
        info!("rename from {} to {}", src_path, dst_path);
        let mut file = self.0.borrow_mut();
        file.file_rename(src_path, dst_path)
    }
}

impl Drop for Inode {
    fn drop(&mut self) {
        let mut file = self.0.borrow_mut();
        info!("Drop struct Inode {:?}", file.get_path());
        file.file_close().expect("failed to close fd");
        drop(file); // todo
    }
}

我们还需要实现文件系统对象的包装：

/// The Ext4 filesystem
#[allow(dead_code)]
pub struct Ext4FileSystem {
    inner: Ext4BlockWrapper<Disk>,
    root: Arc<Inode>,
}

unsafe impl Sync for Ext4FileSystem {}
unsafe impl Send for Ext4FileSystem {}

impl Ext4FileSystem {
    /// Create a new Ext4 filesystem
    pub fn new(disk: Disk) -> Self {
        info!(
            "Got Disk size:{}, position:{}",
            disk.size(),
            disk.position()
        );
        let inner = Ext4BlockWrapper::<Disk>::new(disk)
            .expect("failed to initialize EXT4 filesystem");
        let root = Arc::new(Inode::new("/", InodeTypes::EXT4_DE_DIR));
        Self { inner, root }
    }

    /// Get the root directory
    pub fn root_dir(&self) -> Arc<Inode> {
        info!("trying to get the root dir");
        Arc::clone(&self.root)
    }
}

OSInode

接着还需要对 OSInode（即内核操纵的文件句柄）进行一点点的改动，用于维护生命周期，避免发生访存错误。

// in os/fs/inode.rs

lazy_static! {
    /// ext4 file system
    pub static ref EXT4_FS: Arc<Ext4FileSystem> = Arc::new(Ext4FileSystem::new(Disk::new(BLOCK_DEVICE.clone())));

    /// root inode
    pub static ref ROOT_INODE: Arc<Inode> = EXT4_FS.root_dir();
}

基本完成了对文件系统的接入。还需要改一下 Makefile ，但这部分比较直观就不写了（）

debug

看起来了解思路之后实现挺简单，为什么花了 3 天来接入？似乎大部分时间是在 debug，这部分会介绍我遇到的 tricky 的 bug 以及解决过程。

加载用户程序

当成功把用户程序（这里指官方的测试用例）的 elf 文件解析并执行时，在通过用户的 lib 进入 main 函数时，会引起 page fault

解决过程:

研究测例启动过程：使用 gdb 会发现在 __start_main 第一条语句出问题。我们给用户程序传入的 sp 是栈顶位置，在往上就是非法访问。然而这个语句试图直接访问 sp 指向的地址，这 8 个字节的方向是与栈增长方向相反的，所以出现了访存错误。

.section .text.entry
.globl _start
_start:
    mv a0, sp
    tail __start_main

int __start_main(long *p)
{
	int argc = p[0];
	char **argv = (void *)(p+1);

	exit(main(argc, argv));
	return 0;
}

初步修改：目前的猜测是，测试用例认为我们会给用户程序传参数，所以会试着读取参数。但OS 现阶段还不支持传参，所以我们只是简单在内核中，把栈顶位置扩大 8 个字节，同时传给用户和之前一样的 sp。

死机

在正确实现了文件系统的接入后（修复所有编译构建问题、修复由于 c 字符串和 rust 字符串差异引起的问题），遇到了非常灵异的问题：OS可以顺利从磁盘中读出第一个应用并执行，但会在第一个应用exec第二个应用时卡住。修改函数/添加输出，无论如何总会在一个不明地方卡住。

感谢 crw 师兄，以下的调试思路大部分由他提供：

在 gdb 的页面中，先 continue，让内核直接跑到卡住的地方
ctrl + c 停止 gdb 内程序的运行，这时可以查看停在哪里
发现最后是停在了 trap_from_kernel ，查看此时的 scause = 15，是访存问题，但是为什么没有触发这个函数内的panic呢？接着查看 sepc = trap_from_kernel + 4

这时，大概猜了出来卡住的直接原因：

内核里某个函数触发了 trap_from_kernel，而 trap_from_kernel 的第一条指令会再次触发异常跳到 trap_from_kernel，死循环

查看 backtrace：我们的调用栈比较长：

考虑到：内核栈只有两页、debug模式对于指令没有优化，判断是爆栈。试着把内核栈的大小改成 10 页，顺利修复死机的问题！

回过头来看，可以比较清晰地知道发生了什么：

内核调用读写函数
函数调用链较长导致爆栈，触发异常，跳转到 trap_from_kernel
然而 trap_from_kernel 本身也需要栈空间，于是在执行第一条指令（使用栈空间）时触发异常
又跳转到了 trap_kernel，导致死循环

这就是为什么 OS 卡住了还输出不了 panic 信息的原因！

#Operating-System

内核赛周记1

https://pactheman123.github.io/2025/02/27/内核赛周记1/

作者

Xiaopac

发布于

2025年2月27日

许可协议

RCore-霸王龙下一篇