diff --git a/README.md b/README.md index 13700c8..a6653e2 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ rustc 1.56.0-nightly (08095fc1f 2021-07-26) 3. 在大致掌握本项目后,通过进一步理解和改进zCore,对zCore等新型操作系统有很好的感悟,提升自身实践能力 + #### code/ch04-xx的相关提示 - 推荐运行方式: 在 `ch04-0x` 目录下: `RUST_LOG=info cargo run -p zircon-loader -- /prebuilt/zircon/x64` - ch4 会执行 zircon prebuilt 中的 userboot 程序,详见[userboot源码](https://github.com/vsrinivas/fuchsia/tree/master/zircon/kernel/lib/userabi/userboot),[fuchsia启动流程](https://fuchsia.dev/fuchsia-src/concepts/booting/userboot?hl=en)。 @@ -65,6 +66,7 @@ rustc 1.56.0-nightly (08095fc1f 2021-07-26) - `ch04-03` 实现了与 `channel` 和 `debuglog` 有关的部分 syscall,会执行 3 次 syscall 之后由于不支持 process_exit 而退出。 + ## 参考 - https://fuchsia.dev/ diff --git a/docs/.gitignore b/docs/.gitignore index 7585238..fcbea52 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1 +1,2 @@ book +.DS_Store \ No newline at end of file diff --git a/docs/src/ch03-01-zircon-memory.md b/docs/src/ch03-01-zircon-memory.md index 146173c..711e72c 100644 --- a/docs/src/ch03-01-zircon-memory.md +++ b/docs/src/ch03-01-zircon-memory.md @@ -1 +1,2 @@ # Zircon 内存管理模型 +Zircon 中有两个跟内存管理有关的对象 VMO(Virtual Memory Object)和 VMAR (Virtual Memory Address Region)。VMO 主要负责管理物理内存页面,VMAR 主要负责进程虚拟内存管理。当创建一个进程的时候,需要使用到内存的时候,都需要创建一个 VMO,然后将这个 VMO map 到 VMAR上面。 diff --git a/docs/src/ch03-02-vmo.md b/docs/src/ch03-02-vmo.md index 9da0eae..491ff10 100644 --- a/docs/src/ch03-02-vmo.md +++ b/docs/src/ch03-02-vmo.md @@ -14,6 +14,188 @@ > 实现 VmObject 结构,其中定义 VmObjectTrait 接口,并提供三个具体实现 Paged, Physical, Slice +VmObject 结构体 + +```rust +// vm/vmo/mod.rs +pub struct VmObject { + base: KObjectBase, + resizable: bool, + trait_: Arc, + inner: Mutex, +} + +impl_kobject!(VmObject); + +#[derive(Default)] +struct VmObjectInner { + parent: Weak, + children: Vec>, + mapping_count: usize, + content_size: usize, +} +``` +`trait_` 指向实现了 VMObjectTrait 的对象,它由三个具体实现,分别是 VMObjectPage, VMObjectPhysical, VMObjectSlice。VMObjectPaged 是按页分配内存,VMObjectSlice 主要用于共享内存,VMObjectPhysical 在 zCore-Tutorial 中暂时不会使用到。 +`mapping_count` 表示这个 VmObject 被 map 到几个 VMAR 中。 +`content_size` 是分配的物理内存的大小。 +VmObjectTrait 定义了一组 VMObject* 共有的方法 +```rust +pub trait VMObjectTrait: Sync + Send { + /// Read memory to `buf` from VMO at `offset`. + fn read(&self, offset: usize, buf: &mut [u8]) -> ZxResult; + + /// Write memory from `buf` to VMO at `offset`. + fn write(&self, offset: usize, buf: &[u8]) -> ZxResult; + + /// Resets the range of bytes in the VMO from `offset` to `offset+len` to 0. + fn zero(&self, offset: usize, len: usize) -> ZxResult; + + /// Get the length of VMO. + fn len(&self) -> usize; + + /// Set the length of VMO. + fn set_len(&self, len: usize) -> ZxResult; + + /// Commit a page. + fn commit_page(&self, page_idx: usize, flags: MMUFlags) -> ZxResult; + + /// Commit pages with an external function f. + /// the vmo is internally locked before it calls f, + /// allowing `VmMapping` to avoid deadlock + fn commit_pages_with( + &self, + f: &mut dyn FnMut(&mut dyn FnMut(usize, MMUFlags) -> ZxResult) -> ZxResult, + ) -> ZxResult; + + /// Commit allocating physical memory. + fn commit(&self, offset: usize, len: usize) -> ZxResult; + + /// Decommit allocated physical memory. + fn decommit(&self, offset: usize, len: usize) -> ZxResult; + + /// Create a child VMO. + fn create_child(&self, offset: usize, len: usize) -> ZxResult>; + + /// Append a mapping to the VMO's mapping list. + fn append_mapping(&self, _mapping: Weak) {} + + /// Remove a mapping from the VMO's mapping list. + fn remove_mapping(&self, _mapping: Weak) {} + + /// Complete the VmoInfo. + fn complete_info(&self, info: &mut VmoInfo); + + /// Get the cache policy. + fn cache_policy(&self) -> CachePolicy; + + /// Set the cache policy. + fn set_cache_policy(&self, policy: CachePolicy) -> ZxResult; + + /// Count committed pages of the VMO. + fn committed_pages_in_range(&self, start_idx: usize, end_idx: usize) -> usize; + + /// Pin the given range of the VMO. + fn pin(&self, _offset: usize, _len: usize) -> ZxResult { + Err(ZxError::NOT_SUPPORTED) + } + + /// Unpin the given range of the VMO. + fn unpin(&self, _offset: usize, _len: usize) -> ZxResult { + Err(ZxError::NOT_SUPPORTED) + } + + /// Returns true if the object is backed by a contiguous range of physical memory. + fn is_contiguous(&self) -> bool { + false + } + + /// Returns true if the object is backed by RAM. + fn is_paged(&self) -> bool { + false + } +} +``` +`read()` 和 `write()` 用于读和写,`zero()` 用于清空一段内存。 +比较特别的是:`fn commit_page(&self, page_idx: usize, flags: MMUFlags) -> ZxResult;`,`fn commit(&self, offset: usize, len: usize) -> ZxResult;` 和 `fn commit(&self, offset: usize, len: usize) -> ZxResult;` 主要用于分配物理内存,因为一些内存分配策略,物理内存并不一定是马上分配的,所以需要 commit 来分配一块内存。 +`pin` 和 `unpin` 在这里主要用于增加和减少引用计数。 +VmObject 实现了不同的 new 方法,它们之间的差别在于实现 trait_ 的对象不同。 +```rust +impl VmObject { + /// Create a new VMO backing on physical memory allocated in pages. + pub fn new_paged(pages: usize) -> Arc { + Self::new_paged_with_resizable(false, pages) + } + + /// Create a new VMO, which can be resizable, backing on physical memory allocated in pages. + pub fn new_paged_with_resizable(resizable: bool, pages: usize) -> Arc { + let base = KObjectBase::new(); + Arc::new(VmObject { + resizable, + trait_: VMObjectPaged::new(pages), + inner: Mutex::new(VmObjectInner::default()), + base, + }) + } + + /// Create a new VMO representing a piece of contiguous physical memory. + pub fn new_physical(paddr: PhysAddr, pages: usize) -> Arc { + Arc::new(VmObject { + base: KObjectBase::new(), + resizable: false, + trait_: VMObjectPhysical::new(paddr, pages), + inner: Mutex::new(VmObjectInner::default()), + }) + } + + /// Create a VM object referring to a specific contiguous range of physical frame. + pub fn new_contiguous(pages: usize, align_log2: usize) -> ZxResult> { + let vmo = Arc::new(VmObject { + base: KObjectBase::new(), + resizable: false, + trait_: VMObjectPaged::new_contiguous(pages, align_log2)?, + inner: Mutex::new(VmObjectInner::default()), + }); + Ok(vmo) + } +} +``` +通过 `pub fn create_child(self: &Arc, resizable: bool, offset: usize, len: usize)` 可以创建一个 VMObject 的快照副本。 +```rust +impl VmObject { + /// Create a child VMO. + pub fn create_child( + self: &Arc, + resizable: bool, + offset: usize, + len: usize, + ) -> ZxResult> { + // Create child VmObject + let base = KObjectBase::with_name(&self.base.name()); + let trait_ = self.trait_.create_child(offset, len)?; + let child = Arc::new(VmObject { + base, + resizable, + trait_, + inner: Mutex::new(VmObjectInner { + parent: Arc::downgrade(self), + ..VmObjectInner::default() + }), + }); + // Add child VmObject to this VmObject + self.add_child(&child); + Ok(child) + } + + /// Add child to the list + fn add_child(&self, child: &Arc) { + let mut inner = self.inner.lock(); + // 判断这个 child VmObject 是否还是存在,通过获取子对象的强引用数来判断 + inner.children.retain(|x| x.strong_count() != 0); + // downgrade 将 Arc 转为 Weak + inner.children.push(Arc::downgrade(child)); + } +} +``` ## HAL:用文件模拟物理内存 > 初步介绍 mmap,引出用文件模拟物理内存的思想 @@ -21,11 +203,168 @@ > 创建文件并用 mmap 线性映射到进程地址空间 > > 实现 pmem_read, pmem_write +### mmap +mmap是一种内存映射文件的方法,将一个文件或者其它对象映射到进程的地址空间,实现文件磁盘地址和进程虚拟地址空间中一段虚拟地址一一对应的关系。实现这样的映射关系后,进程就可以采用指针的方式读写操作这一段内存,而系统会自动回写脏页面到对应的文件磁盘上,即完成了对文件的操作而不必再调用read,write等系统调用函数。相反,内核空间对这段区域的修改也直接反映用户空间,从而可以实现不同进程间的文件共享。因此,新建一个文件,然后调用 mmap,其实就相当于分配了一块物理内存,因此我们可以用文件来模拟物理内存。 +![mmap.png](img/mmap.png) +### 分配地址空间 +创建一个文件用于 mmap 系统调用。 +```rust +fn create_pmem_file() -> File { + let dir = tempdir().expect("failed to create pmem dir"); + let path = dir.path().join("pmem"); + + // workaround on macOS to avoid permission denied. + // see https://jiege.ch/software/2020/02/07/macos-mmap-exec/ for analysis on this problem. + #[cfg(target_os = "macos")] + std::mem::forget(dir); + + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(&path) + .expect("failed to create pmem file"); + file.set_len(PMEM_SIZE as u64) + .expect("failed to resize file"); + trace!("create pmem file: path={:?}, size={:#x}", path, PMEM_SIZE); + let prot = libc::PROT_READ | libc::PROT_WRITE; + // 调用 mmap (这个不是系统调用)进行文件和内存之间的双向映射 + mmap(file.as_raw_fd(), 0, PMEM_SIZE, phys_to_virt(0), prot); + file +} +``` +mmap: +```rust +/// Mmap frame file `fd` to `vaddr`. +fn mmap(fd: libc::c_int, offset: usize, len: usize, vaddr: VirtAddr, prot: libc::c_int) { + // 根据不同的操作系统去修改权限 + // workaround on macOS to write text section. + #[cfg(target_os = "macos")] + let prot = if prot & libc::PROT_EXEC != 0 { + prot | libc::PROT_WRITE + } else { + prot + }; + + // 调用 mmap 系统调用,ret 为返回值 + let ret = unsafe { + let flags = libc::MAP_SHARED | libc::MAP_FIXED; + libc::mmap(vaddr as _, len, prot, flags, fd, offset as _) + } as usize; + trace!( + "mmap file: fd={}, offset={:#x}, len={:#x}, vaddr={:#x}, prot={:#b}", + fd, + offset, + len, + vaddr, + prot, + ); + assert_eq!(ret, vaddr, "failed to mmap: {:?}", Error::last_os_error()); +} +``` +最后创建一个全局变量保存这个分配的内存 +```rust +lazy_static! { + static ref FRAME_FILE: File = create_pmem_file(); +} +``` +### pmem_read 和 pmem_write +```rust +/// Read physical memory from `paddr` to `buf`. +#[export_name = "hal_pmem_read"] +pub fn pmem_read(paddr: PhysAddr, buf: &mut [u8]) { + trace!("pmem read: paddr={:#x}, len={:#x}", paddr, buf.len()); + assert!(paddr + buf.len() <= PMEM_SIZE); + ensure_mmap_pmem(); + unsafe { + (phys_to_virt(paddr) as *const u8).copy_to_nonoverlapping(buf.as_mut_ptr(), buf.len()); + } +} +/// Write physical memory to `paddr` from `buf`. +#[export_name = "hal_pmem_write"] +pub fn pmem_write(paddr: PhysAddr, buf: &[u8]) { + trace!("pmem write: paddr={:#x}, len={:#x}", paddr, buf.len()); + assert!(paddr + buf.len() <= PMEM_SIZE); + ensure_mmap_pmem(); + unsafe { + buf.as_ptr() + .copy_to_nonoverlapping(phys_to_virt(paddr) as _, buf.len()); + } +} + +/// Ensure physical memory are mmapped and accessible. +fn ensure_mmap_pmem() { + FRAME_FILE.as_raw_fd(); +} +``` +`ensure_mmap_pmem()` 确保物理内存已经映射 +`copy_to_nonoverlapping(self, dst *mut T, count: usize)` 将 self 的字节序列拷贝到 dst 中,source 和 destination 是不互相重叠的。`(phys_to_virt(paddr) as *const u8).copy_to_nonoverlapping(buf.as_mut_ptr(), buf.len());` 通过 `phys_to_virt(paddr)` 将 paddr 加上 PMEM_BASE 转为虚拟地址,然后将里面的字节拷贝到 buf 里面。 ## 实现物理内存 VMO > 用 HAL 实现 VmObjectPhysical 的方法,并做单元测试 +物理内存 VMO 结构体: +```rust +pub struct VMObjectPhysical { + paddr: PhysAddr, + pages: usize, + /// Lock this when access physical memory. + data_lock: Mutex<()>, + inner: Mutex, +} +struct VMObjectPhysicalInner { + cache_policy: CachePolicy, +} +``` +这里比较奇怪的是 data_lock 这个字段,这个字段里 Mutex 的泛型类型是一个 unit type,其实相当于它是没有“值”的,它只是起到一个锁的作用。 +```rust +impl VMObjectTrait for VMObjectPhysical { + fn read(&self, offset: usize, buf: &mut [u8]) -> ZxResult { + let _ = self.data_lock.lock(); // 先获取锁 + assert!(offset + buf.len() <= self.len()); + kernel_hal::pmem_read(self.paddr + offset, buf); // 对一块物理内存进行读 + Ok(()) + } +} +``` ## 实现切片 VMO > 实现 VmObjectSlice,并做单元测试 +VMObjectSlice 中的 parent 用于指向一个实际的 VMO 对象,比如:VMObjectPaged,这样通过 VMObjectSlice 就可以实现对 VMObjectPaged 的共享。 +```rust +pub struct VMObjectSlice { + /// Parent node. + parent: Arc, + /// The offset from parent. + offset: usize, + /// The size in bytes. + size: usize, +} + +impl VMObjectSlice { + pub fn new(parent: Arc, offset: usize, size: usize) -> Arc { + Arc::new(VMObjectSlice { + parent, + offset, + size, + }) + } + + fn check_range(&self, offset: usize, len: usize) -> ZxResult { + if offset + len >= self.size { + return Err(ZxError::OUT_OF_RANGE); + } + Ok(()) + } +} +``` +VMObjectSlice 实现的读写,第一步是 `check_range` ,第二步是调用 parent 中的读写方法。 +```rust +impl VMObjectTrait for VMObjectSlice { + fn read(&self, offset: usize, buf: &mut [u8]) -> ZxResult { + self.check_range(offset, buf.len())?; + self.parent.read(offset + self.offset, buf) + } +} +``` \ No newline at end of file diff --git a/docs/src/ch03-03-vmo-paged.md b/docs/src/ch03-03-vmo-paged.md index 58c3a7a..a27cc0b 100644 --- a/docs/src/ch03-03-vmo-paged.md +++ b/docs/src/ch03-03-vmo-paged.md @@ -7,18 +7,399 @@ > > 介绍 commit 操作的意义和作用 +commit_page 和 commit_pages_with 函数的作用:用于检查物理页帧是否已经分配。 + ## HAL:物理内存管理 > 在 HAL 中实现 PhysFrame 和最简单的分配器 +### kernel-hal +```rust +#[repr(C)] +pub struct PhysFrame { + // paddr 物理地址 + paddr: PhysAddr, +} + +impl PhysFrame { + // 分配物理页帧 + #[linkage = "weak"] + #[export_name = "hal_frame_alloc"] + pub fn alloc() -> Option { + unimplemented!() + } + + #[linkage = "weak"] + #[export_name = "hal_frame_alloc_contiguous"] + pub fn alloc_contiguous_base(_size: usize, _align_log2: usize) -> Option { + unimplemented!() + } + + pub fn alloc_contiguous(size: usize, align_log2: usize) -> Vec { + PhysFrame::alloc_contiguous_base(size, align_log2).map_or(Vec::new(), |base| { + (0..size) + .map(|i| PhysFrame { + paddr: base + i * PAGE_SIZE, + }) + .collect() + }) + } + + pub fn alloc_zeroed() -> Option { + Self::alloc().map(|f| { + pmem_zero(f.addr(), PAGE_SIZE); + f + }) + } + + pub fn alloc_contiguous_zeroed(size: usize, align_log2: usize) -> Vec { + PhysFrame::alloc_contiguous_base(size, align_log2).map_or(Vec::new(), |base| { + pmem_zero(base, size * PAGE_SIZE); + (0..size) + .map(|i| PhysFrame { + paddr: base + i * PAGE_SIZE, + }) + .collect() + }) + } + + pub fn addr(&self) -> PhysAddr { + self.paddr + } + + #[linkage = "weak"] + #[export_name = "hal_zero_frame_paddr"] + pub fn zero_frame_addr() -> PhysAddr { + unimplemented!() + } +} + +impl Drop for PhysFrame { + #[linkage = "weak"] + #[export_name = "hal_frame_dealloc"] + fn drop(&mut self) { + unimplemented!() + } +} +``` +### kernel-hal-unix +通过下面的代码可以构造一个页帧号。`(PAGE_SIZE..PMEM_SIZE).step_by(PAGE_SIZE).collect()` 可以每隔 PAGE_SIZE 生成一个页帧的开始位置。 +```rust +lazy_static! { + static ref AVAILABLE_FRAMES: Mutex> = + Mutex::new((PAGE_SIZE..PMEM_SIZE).step_by(PAGE_SIZE).collect()); +} +``` +分配一块物理页帧就是从 AVAILABLE_FRAMES 中通过 pop_front 弹出一个页号 +```rust +impl PhysFrame { + #[export_name = "hal_frame_alloc"] + pub fn alloc() -> Option { + let ret = AVAILABLE_FRAMES + .lock() + .unwrap() + .pop_front() + .map(|paddr| PhysFrame { paddr }); + trace!("frame alloc: {:?}", ret); + ret + } + #[export_name = "hal_zero_frame_paddr"] + pub fn zero_frame_addr() -> PhysAddr { + 0 + } +} + +impl Drop for PhysFrame { + #[export_name = "hal_frame_dealloc"] + fn drop(&mut self) { + trace!("frame dealloc: {:?}", self); + AVAILABLE_FRAMES.lock().unwrap().push_back(self.paddr); + } +} +``` ## 辅助结构:BlockRange 迭代器 > 实现 BlockRange +在按页分配内存的 VMObjectPaged 的读和写的方法中会使用到一个 BlockIter 迭代器。BlockIter 主要用于将一段内存分块,每次返回这一块的信息也就是 BlockRange。 +### BlockIter +```rust +#[derive(Debug, Eq, PartialEq)] +pub struct BlockRange { + pub block: usize, + pub begin: usize, // 块内地址开始位置 + pub end: usize, // 块内地址结束位置 + pub block_size_log2: u8, +} + +/// Given a range and iterate sub-range for each block +pub struct BlockIter { + pub begin: usize, + pub end: usize, + pub block_size_log2: u8, +} +``` +block_size_log2 是 log 以2为底 block size, 比如:block size 大小为4096,则 block_size_log2 为 12。block 是块编号。 +```rust +impl BlockRange { + pub fn len(&self) -> usize { + self.end - self.begin + } + pub fn is_full(&self) -> bool { + self.len() == (1usize << self.block_size_log2) + } + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + pub fn origin_begin(&self) -> usize { + (self.block << self.block_size_log2) + self.begin + } + pub fn origin_end(&self) -> usize { + (self.block << self.block_size_log2) + self.end + } +} + +impl Iterator for BlockIter { + type Item = BlockRange; + + fn next(&mut self) -> Option<::Item> { + if self.begin >= self.end { + return None; + } + let block_size_log2 = self.block_size_log2; + let block_size = 1usize << self.block_size_log2; + let block = self.begin / block_size; + let begin = self.begin % block_size; + // 只有最后一块需要计算块内最后的地址,其他的直接返回块的大小 + let end = if block == self.end / block_size { + self.end % block_size + } else { + block_size + }; + self.begin += end - begin; + Some(BlockRange { + block, + begin, + end, + block_size_log2, + }) + } +} +``` ## 实现按页分配的 VMO > 实现 for_each_page, commit, read, write 函数 +按页分配的 VMO 结构体如下: +```rust +pub struct VMObjectPaged { + inner: Mutex, +} + +/// The mutable part of `VMObjectPaged`. +#[derive(Default)] +struct VMObjectPagedInner { + /// Physical frames of this VMO. + frames: Vec, + /// Cache Policy + cache_policy: CachePolicy, + /// Is contiguous + contiguous: bool, + /// Sum of pin_count + pin_count: usize, + /// All mappings to this VMO. + mappings: Vec>, +} +``` +VMObjectPage 有两个 new 方法 +```rust +impl VMObjectPaged { + /// Create a new VMO backing on physical memory allocated in pages. + pub fn new(pages: usize) -> Arc { + let mut frames = Vec::new(); + frames.resize_with(pages, || PhysFrame::alloc_zeroed().unwrap()); // 分配 pages 个页帧号,并将这些页帧号的内存清零 + Arc::new(VMObjectPaged { + inner: Mutex::new(VMObjectPagedInner { + frames, + ..Default::default() + }), + }) + } + + /// Create a list of contiguous pages + pub fn new_contiguous(pages: usize, align_log2: usize) -> ZxResult> { + let frames = PhysFrame::alloc_contiguous_zeroed(pages, align_log2 - PAGE_SIZE_LOG2); + if frames.is_empty() { + return Err(ZxError::NO_MEMORY); + } + Ok(Arc::new(VMObjectPaged { + inner: Mutex::new(VMObjectPagedInner { + frames, + contiguous: true, + ..Default::default() + }), + })) + } +} +``` +VMObjectPaged 的读和写用到了一个非常重要的函数 for_each_page 。首先它先构造了一个 BlockIter 迭代器,然后调用传入的函数进行读或者写。 +```rust +impl VMObjectPagedInner { + /// Helper function to split range into sub-ranges within pages. + /// + /// ```text + /// VMO range: + /// |----|----|----|----|----| + /// + /// buf: + /// [====len====] + /// |--offset--| + /// + /// sub-ranges: + /// [===] + /// [====] + /// [==] + /// ``` + /// + /// `f` is a function to process in-page ranges. + /// It takes 2 arguments: + /// * `paddr`: the start physical address of the in-page range. + /// * `buf_range`: the range in view of the input buffer. + fn for_each_page( + &mut self, + offset: usize, + buf_len: usize, + mut f: impl FnMut(PhysAddr, Range), + ) { + let iter = BlockIter { + begin: offset, + end: offset + buf_len, + block_size_log2: 12, + }; + for block in iter { + // 获取这一块开始的物理地址 + let paddr = self.frames[block.block].addr(); + // 这块物理地址的范围 + let buf_range = block.origin_begin() - offset..block.origin_end() - offset; + f(paddr + block.begin, buf_range); + } + } +} +``` +read 和 write 函数,一个传入的是 `kernel_hal::pmem_read` ,另外一个是 `kernel_hal::pmem_write` +```rust +impl VMObjectTrait for VMObjectPaged { + fn read(&self, offset: usize, buf: &mut [u8]) -> ZxResult { + let mut inner = self.inner.lock(); + if inner.cache_policy != CachePolicy::Cached { + return Err(ZxError::BAD_STATE); + } + inner.for_each_page(offset, buf.len(), |paddr, buf_range| { + kernel_hal::pmem_read(paddr, &mut buf[buf_range]); + }); + Ok(()) + } + + fn write(&self, offset: usize, buf: &[u8]) -> ZxResult { + let mut inner = self.inner.lock(); + if inner.cache_policy != CachePolicy::Cached { + return Err(ZxError::BAD_STATE); + } + inner.for_each_page(offset, buf.len(), |paddr, buf_range| { + kernel_hal::pmem_write(paddr, &buf[buf_range]); + }); + Ok(()) + } +} +``` +commit 函数 +```rust +impl VMObjectTrait for VMObjectPaged { + fn commit_page(&self, page_idx: usize, _flags: MMUFlags) -> ZxResult { + let inner = self.inner.lock(); + Ok(inner.frames[page_idx].addr()) + } + + fn commit_pages_with( + &self, + f: &mut dyn FnMut(&mut dyn FnMut(usize, MMUFlags) -> ZxResult) -> ZxResult, + ) -> ZxResult { + let inner = self.inner.lock(); + f(&mut |page_idx, _| Ok(inner.frames[page_idx].addr())) + } +} +``` ## VMO 复制 > 实现 create_child 函数 + +create_child 是将原 VMObjectPaged 的内容拷贝一份 +```rust +// object/vm/vmo/paged.rs + +impl VMObjectTrait for VMObjectPaged { + fn create_child(&self, offset: usize, len: usize) -> ZxResult> { + assert!(page_aligned(offset)); + assert!(page_aligned(len)); + let mut inner = self.inner.lock(); + let child = inner.create_child(offset, len)?; + Ok(child) + } + + /// Create a snapshot child VMO. + fn create_child(&mut self, offset: usize, len: usize) -> ZxResult> { + // clone contiguous vmo is no longer permitted + // https://fuchsia.googlesource.com/fuchsia/+/e6b4c6751bbdc9ed2795e81b8211ea294f139a45 + if self.contiguous { + return Err(ZxError::INVALID_ARGS); + } + if self.cache_policy != CachePolicy::Cached || self.pin_count != 0 { + return Err(ZxError::BAD_STATE); + } + let mut frames = Vec::with_capacity(pages(len)); + for _ in 0..pages(len) { + frames.push(PhysFrame::alloc().ok_or(ZxError::NO_MEMORY)?); + } + for (i, frame) in frames.iter().enumerate() { + if let Some(src_frame) = self.frames.get(pages(offset) + i) { + kernel_hal::frame_copy(src_frame.addr(), frame.addr()) + } else { + kernel_hal::pmem_zero(frame.addr(), PAGE_SIZE); + } + } + // create child VMO + let child = Arc::new(VMObjectPaged { + inner: Mutex::new(VMObjectPagedInner { + frames, + ..Default::default() + }), + }); + Ok(child) + } +} + +// kernel-hal-unix/sr/lib.rs + +/// Copy content of `src` frame to `target` frame +#[export_name = "hal_frame_copy"] +pub fn frame_copy(src: PhysAddr, target: PhysAddr) { + trace!("frame_copy: {:#x} <- {:#x}", target, src); + assert!(src + PAGE_SIZE <= PMEM_SIZE && target + PAGE_SIZE <= PMEM_SIZE); + ensure_mmap_pmem(); + unsafe { + let buf = phys_to_virt(src) as *const u8; + buf.copy_to_nonoverlapping(phys_to_virt(target) as _, PAGE_SIZE); + } +} + +/// Zero physical memory at `[paddr, paddr + len)` +#[export_name = "hal_pmem_zero"] +pub fn pmem_zero(paddr: PhysAddr, len: usize) { + trace!("pmem_zero: addr={:#x}, len={:#x}", paddr, len); + assert!(paddr + len <= PMEM_SIZE); + ensure_mmap_pmem(); + unsafe { + core::ptr::write_bytes(phys_to_virt(paddr) as *mut u8, 0, len); + } +} +``` diff --git a/docs/src/ch03-04-vmar.md b/docs/src/ch03-04-vmar.md index 5cf4539..8520fd6 100644 --- a/docs/src/ch03-04-vmar.md +++ b/docs/src/ch03-04-vmar.md @@ -10,10 +10,262 @@ > > 实现 create_child, map, unmap, destroy 函数,并做单元测试验证地址空间分配 +### VmAddressRegion +```rust +pub struct VmAddressRegion { + flags: VmarFlags, + base: KObjectBase, + addr: VirtAddr, + size: usize, + parent: Option>, + page_table: Arc>, + /// If inner is None, this region is destroyed, all operations are invalid. + inner: Mutex>, +} + +#[derive(Default)] +struct VmarInner { + children: Vec>, + mappings: Vec>, +} +``` +构造一个根节点 VMAR,这个 VMAR 是每个进程都拥有的。 +```rust +impl VmAddressRegion { + /// Create a new root VMAR. + pub fn new_root() -> Arc { + let (addr, size) = { + use core::sync::atomic::*; + static VMAR_ID: AtomicUsize = AtomicUsize::new(0); + let i = VMAR_ID.fetch_add(1, Ordering::SeqCst); + (0x2_0000_0000 + 0x100_0000_0000 * i, 0x100_0000_0000) + }; + Arc::new(VmAddressRegion { + flags: VmarFlags::ROOT_FLAGS, + base: KObjectBase::new(), + addr, + size, + parent: None, + page_table: Arc::new(Mutex::new(kernel_hal::PageTable::new())), //hal PageTable + inner: Mutex::new(Some(VmarInner::default())), + }) + } +} +``` +我们的内核同样需要一个根 VMAR +```rust +/// The base of kernel address space +/// In x86 fuchsia this is 0xffff_ff80_0000_0000 instead +pub const KERNEL_ASPACE_BASE: u64 = 0xffff_ff02_0000_0000; +/// The size of kernel address space +pub const KERNEL_ASPACE_SIZE: u64 = 0x0000_0080_0000_0000; +/// The base of user address space +pub const USER_ASPACE_BASE: u64 = 0; +// pub const USER_ASPACE_BASE: u64 = 0x0000_0000_0100_0000; +/// The size of user address space +pub const USER_ASPACE_SIZE: u64 = (1u64 << 47) - 4096 - USER_ASPACE_BASE; + +impl VmAddressRegion { + /// Create a kernel root VMAR. + pub fn new_kernel() -> Arc { + let kernel_vmar_base = KERNEL_ASPACE_BASE as usize; + let kernel_vmar_size = KERNEL_ASPACE_SIZE as usize; + Arc::new(VmAddressRegion { + flags: VmarFlags::ROOT_FLAGS, + base: KObjectBase::new(), + addr: kernel_vmar_base, + size: kernel_vmar_size, + parent: None, + page_table: Arc::new(Mutex::new(kernel_hal::PageTable::new())), + inner: Mutex::new(Some(VmarInner::default())), + }) + } +} +``` +### VmAddressMapping +VmAddressMapping 用于建立 VMO 和 VMAR 之间的映射。 +```rust +/// Virtual Memory Mapping +pub struct VmMapping { + /// The permission limitation of the vmar + permissions: MMUFlags, + vmo: Arc, + page_table: Arc>, + inner: Mutex, +} + +#[derive(Debug, Clone)] +struct VmMappingInner { + /// The actual flags used in the mapping of each page + flags: Vec, + addr: VirtAddr, + size: usize, + vmo_offset: usize, +} +``` +map 和 unmap 实现内存映射和解映射 +```rust +impl VmMapping { + /// Map range and commit. + /// Commit pages to vmo, and map those to frames in page_table. + /// Temporarily used for development. A standard procedure for + /// vmo is: create_vmo, op_range(commit), map + fn map(self: &Arc) -> ZxResult { + self.vmo.commit_pages_with(&mut |commit| { + let inner = self.inner.lock(); + let mut page_table = self.page_table.lock(); + let page_num = inner.size / PAGE_SIZE; + let vmo_offset = inner.vmo_offset / PAGE_SIZE; + for i in 0..page_num { + let paddr = commit(vmo_offset + i, inner.flags[i])?; + //通过 PageTableTrait 的 hal_pt_map 进行页表映射 + //调用 kernel-hal的方法进行映射 + } + Ok(()) + }) + } + + fn unmap(&self) { + let inner = self.inner.lock(); + let pages = inner.size / PAGE_SIZE; + // TODO inner.vmo_offset unused? + // 调用 kernel-hal的方法进行解映射 + } +} +``` ## HAL:用 mmap 模拟页表 > 实现页表接口 map, unmap, protect +在 kernel-hal 中定义了一个页表和这个页表具有的方法。 +```rust +/// Page Table +#[repr(C)] +pub struct PageTable { + table_phys: PhysAddr, +} + +impl PageTable { + /// Get current page table + #[linkage = "weak"] + #[export_name = "hal_pt_current"] + pub fn current() -> Self { + unimplemented!() + } + + /// Create a new `PageTable`. + #[allow(clippy::new_without_default)] + #[linkage = "weak"] + #[export_name = "hal_pt_new"] + pub fn new() -> Self { + unimplemented!() + } +} + +impl PageTableTrait for PageTable { + /// Map the page of `vaddr` to the frame of `paddr` with `flags`. + #[linkage = "weak"] + #[export_name = "hal_pt_map"] + fn map(&mut self, _vaddr: VirtAddr, _paddr: PhysAddr, _flags: MMUFlags) -> Result<()> { + unimplemented!() + } + /// Unmap the page of `vaddr`. + #[linkage = "weak"] + #[export_name = "hal_pt_unmap"] + fn unmap(&mut self, _vaddr: VirtAddr) -> Result<()> { + unimplemented!() + } + /// Change the `flags` of the page of `vaddr`. + #[linkage = "weak"] + #[export_name = "hal_pt_protect"] + fn protect(&mut self, _vaddr: VirtAddr, _flags: MMUFlags) -> Result<()> { + unimplemented!() + } + /// Query the physical address which the page of `vaddr` maps to. + #[linkage = "weak"] + #[export_name = "hal_pt_query"] + fn query(&mut self, _vaddr: VirtAddr) -> Result { + unimplemented!() + } + /// Get the physical address of root page table. + #[linkage = "weak"] + #[export_name = "hal_pt_table_phys"] + fn table_phys(&self) -> PhysAddr { + self.table_phys + } + + /// Activate this page table + #[cfg(target_arch = "riscv64")] + #[linkage = "weak"] + #[export_name = "hal_pt_activate"] + fn activate(&self) { + unimplemented!() + } + + #[linkage = "weak"] + #[export_name = "hal_pt_unmap_cont"] + fn unmap_cont(&mut self, vaddr: VirtAddr, pages: usize) -> Result<()> { + for i in 0..pages { + self.unmap(vaddr + i * PAGE_SIZE)?; + } + Ok(()) + } +} +``` +在 kernel-hal-unix 中实现了 PageTableTrait,在 map 中调用了 mmap。 +```rust +impl PageTableTrait for PageTable { + /// Map the page of `vaddr` to the frame of `paddr` with `flags`. + #[export_name = "hal_pt_map"] + fn map(&mut self, vaddr: VirtAddr, paddr: PhysAddr, flags: MMUFlags) -> Result<()> { + debug_assert!(page_aligned(vaddr)); + debug_assert!(page_aligned(paddr)); + let prot = flags.to_mmap_prot(); + mmap(FRAME_FILE.as_raw_fd(), paddr, PAGE_SIZE, vaddr, prot); + Ok(()) + } + + /// Unmap the page of `vaddr`. + #[export_name = "hal_pt_unmap"] + fn unmap(&mut self, vaddr: VirtAddr) -> Result<()> { + self.unmap_cont(vaddr, 1) + } +} +``` ## 实现内存映射 > 用 HAL 实现上面 VMAR 留空的部分,并做单元测试验证内存映射 +```rust +impl VmMapping { + /// Map range and commit. + /// Commit pages to vmo, and map those to frames in page_table. + /// Temporarily used for development. A standard procedure for + /// vmo is: create_vmo, op_range(commit), map + fn map(self: &Arc) -> ZxResult { + self.vmo.commit_pages_with(&mut |commit| { + let inner = self.inner.lock(); + let mut page_table = self.page_table.lock(); + let page_num = inner.size / PAGE_SIZE; + let vmo_offset = inner.vmo_offset / PAGE_SIZE; + for i in 0..page_num { + let paddr = commit(vmo_offset + i, inner.flags[i])?; + //通过 PageTableTrait 的 hal_pt_map 进行页表映射 + page_table + .map(inner.addr + i * PAGE_SIZE, paddr, inner.flags[i]) + .expect("failed to map"); + } + Ok(()) + }) + } + + fn unmap(&self) { + let inner = self.inner.lock(); + let pages = inner.size / PAGE_SIZE; + // TODO inner.vmo_offset unused? + self.page_table + .lock() + .unmap_cont(inner.addr, pages) + .expect("failed to unmap") + } +} +``` \ No newline at end of file diff --git a/docs/src/img/mmap.png b/docs/src/img/mmap.png new file mode 100644 index 0000000..852e576 Binary files /dev/null and b/docs/src/img/mmap.png differ