diff --git a/kernel/src/arch/x86_64/gdt.rs b/kernel/src/arch/x86_64/gdt.rs index a445e9b..8d50d0c 100644 --- a/kernel/src/arch/x86_64/gdt.rs +++ b/kernel/src/arch/x86_64/gdt.rs @@ -3,6 +3,7 @@ use alloc::boxed::Box; use x86_64::{PrivilegeLevel, VirtAddr}; use x86_64::structures::gdt::*; use x86_64::structures::tss::TaskStateSegment; +use x86_64::registers::model_specific::Msr; use crate::consts::MAX_CPU_NUM; @@ -49,12 +50,10 @@ impl Cpu { // GDT self.gdt.add_entry(KCODE); - self.gdt.add_entry(UCODE); - // KDATA use segment 0 - // self.gdt.add_entry(KDATA); - self.gdt.add_entry(UDATA); + self.gdt.add_entry(KDATA); self.gdt.add_entry(UCODE32); self.gdt.add_entry(UDATA32); + self.gdt.add_entry(UCODE); self.gdt.add_entry(Descriptor::tss_segment(&self.tss)); self.gdt.load(); @@ -62,6 +61,10 @@ impl Cpu { set_cs(KCODE_SELECTOR); // load TSS load_tss(TSS_SELECTOR); + // for fast syscall: + // store address of TSS to kernel_gsbase + let mut kernel_gsbase = Msr::new(0xC0000102); + kernel_gsbase.write(&self.tss as *const _ as u64); } /// 设置从Ring3跳到Ring0时,自动切换栈的地址 @@ -81,14 +84,15 @@ const UCODE: Descriptor = Descriptor::UserSegment(0x0020F80000000000); // EXECU const KDATA: Descriptor = Descriptor::UserSegment(0x0000920000000000); // DATA_WRITABLE | USER_SEGMENT | PRESENT const UDATA: Descriptor = Descriptor::UserSegment(0x0000F20000000000); // DATA_WRITABLE | USER_SEGMENT | USER_MODE | PRESENT // Copied from xv6 -const UCODE32: Descriptor = Descriptor::UserSegment(0x00cffa00_0000ffff); -// EXECUTABLE | USER_SEGMENT | USER_MODE | PRESENT +const UCODE32: Descriptor = Descriptor::UserSegment(0x00cffa00_0000ffff); // EXECUTABLE | USER_SEGMENT | USER_MODE | PRESENT const UDATA32: Descriptor = Descriptor::UserSegment(0x00cff200_0000ffff); // EXECUTABLE | USER_SEGMENT | USER_MODE | PRESENT +// NOTICE: for fast syscall: +// STAR[47:32] = K_CS = K_SS - 8 +// STAR[63:48] = U_CS32 = U_SS32 - 8 = U_CS - 16 pub const KCODE_SELECTOR: SegmentSelector = SegmentSelector::new(1, PrivilegeLevel::Ring0); -pub const UCODE_SELECTOR: SegmentSelector = SegmentSelector::new(2, PrivilegeLevel::Ring3); -pub const KDATA_SELECTOR: SegmentSelector = SegmentSelector::new(0, PrivilegeLevel::Ring0); -pub const UDATA_SELECTOR: SegmentSelector = SegmentSelector::new(3, PrivilegeLevel::Ring3); -pub const UCODE32_SELECTOR: SegmentSelector = SegmentSelector::new(4, PrivilegeLevel::Ring3); -pub const UDATA32_SELECTOR: SegmentSelector = SegmentSelector::new(5, PrivilegeLevel::Ring3); +pub const KDATA_SELECTOR: SegmentSelector = SegmentSelector::new(2, PrivilegeLevel::Ring0); +pub const UCODE32_SELECTOR: SegmentSelector = SegmentSelector::new(3, PrivilegeLevel::Ring3); +pub const UDATA32_SELECTOR: SegmentSelector = SegmentSelector::new(4, PrivilegeLevel::Ring3); +pub const UCODE_SELECTOR: SegmentSelector = SegmentSelector::new(5, PrivilegeLevel::Ring3); pub const TSS_SELECTOR: SegmentSelector = SegmentSelector::new(6, PrivilegeLevel::Ring0); \ No newline at end of file diff --git a/kernel/src/arch/x86_64/idt.rs b/kernel/src/arch/x86_64/idt.rs index 59ecf87..2679dc7 100644 --- a/kernel/src/arch/x86_64/idt.rs +++ b/kernel/src/arch/x86_64/idt.rs @@ -19,7 +19,7 @@ lazy_static! { // * 某些保留中断号不允许设置,会触发panic // 于是下面用了一些trick绕过了它们 - let ring3 = [SwitchToKernel, Syscall, Syscall32]; + let ring3 = [Syscall32]; let mut idt = InterruptDescriptorTable::new(); let entries = unsafe{ &mut *(&mut idt as *mut _ as *mut [Entry; 256]) }; diff --git a/kernel/src/arch/x86_64/interrupt/consts.rs b/kernel/src/arch/x86_64/interrupt/consts.rs index 449c56f..4ab7494 100644 --- a/kernel/src/arch/x86_64/interrupt/consts.rs +++ b/kernel/src/arch/x86_64/interrupt/consts.rs @@ -24,10 +24,7 @@ pub const VirtualizationException: u8 = 20; pub const SecurityException: u8 = 30; pub const IRQ0: u8 = 32; -pub const Syscall: u8 = 0x40; pub const Syscall32: u8 = 0x80; -pub const SwitchToUser: u8 = 120; -pub const SwitchToKernel: u8 = 121; // IRQ pub const Timer: u8 = 0; diff --git a/kernel/src/arch/x86_64/interrupt/fast_syscall.rs b/kernel/src/arch/x86_64/interrupt/fast_syscall.rs new file mode 100644 index 0000000..6536384 --- /dev/null +++ b/kernel/src/arch/x86_64/interrupt/fast_syscall.rs @@ -0,0 +1,44 @@ +/// `syscall` instruction + +use x86_64::registers::model_specific::*; +use core::mem::transmute; +use super::super::gdt; +use super::TrapFrame; + +pub fn init() { + unsafe { + Efer::update(|flags| { + *flags |= EferFlags::SYSTEM_CALL_EXTENSIONS; + }); + + let mut star = Msr::new(0xC0000081); + let mut lstar = Msr::new(0xC0000082); + let mut sfmask = Msr::new(0xC0000084); + + // flags to clear on syscall + // copy from Linux 5.0 + // TF|DF|IF|IOPL|AC|NT + let rflags_mask = 0x47700; + + star.write(transmute(STAR)); + lstar.write(syscall_entry as u64); + sfmask.write(rflags_mask); + } +} + +extern { + fn syscall_entry(); +} + +#[repr(packed)] +struct StarMsr { + eip: u32, + kernel_cs: u16, + user_cs: u16, +} + +const STAR: StarMsr = StarMsr { + eip: 0, // ignored in 64 bit mode + kernel_cs: gdt::KCODE_SELECTOR.0, + user_cs: gdt::UCODE32_SELECTOR.0, +}; diff --git a/kernel/src/arch/x86_64/interrupt/handler.rs b/kernel/src/arch/x86_64/interrupt/handler.rs index ad95045..6b34073 100644 --- a/kernel/src/arch/x86_64/interrupt/handler.rs +++ b/kernel/src/arch/x86_64/interrupt/handler.rs @@ -102,9 +102,6 @@ pub extern fn rust_trap(tf: &mut TrapFrame) { }, } } - SwitchToKernel => to_kernel(tf), - SwitchToUser => to_user(tf), - Syscall => syscall(tf), Syscall32 => syscall32(tf), InvalidOpcode => invalid_opcode(tf), DivideError | GeneralProtectionFault => error(tf), @@ -167,22 +164,8 @@ fn ide() { trace!("\nInterupt: IDE"); } -fn to_user(tf: &mut TrapFrame) { - use crate::arch::gdt; - info!("\nInterupt: To User"); - tf.cs = gdt::UCODE_SELECTOR.0 as usize; - tf.ss = gdt::UDATA_SELECTOR.0 as usize; - tf.rflags |= 3 << 12; // 设置EFLAG的I/O特权位,使得在用户态可使用in/out指令 -} - -fn to_kernel(tf: &mut TrapFrame) { - use crate::arch::gdt; - info!("\nInterupt: To Kernel"); - tf.cs = gdt::KCODE_SELECTOR.0 as usize; - tf.ss = gdt::KDATA_SELECTOR.0 as usize; -} - -fn syscall(tf: &mut TrapFrame) { +#[no_mangle] +pub extern "C" fn syscall(tf: &mut TrapFrame) { trace!("\nInterupt: Syscall {:#x?}", tf.rax); let ret = crate::syscall::syscall(tf.rax, [tf.rdi, tf.rsi, tf.rdx, tf.r10, tf.r8, tf.r9], tf); tf.rax = ret as usize; @@ -199,8 +182,8 @@ fn invalid_opcode(tf: &mut TrapFrame) { let opcode = unsafe { (tf.rip as *mut u16).read() }; const SYSCALL_OPCODE: u16 = 0x05_0f; if opcode == SYSCALL_OPCODE { + tf.rip += 2; // must before syscall syscall(tf); - tf.rip += 2; } else { crate::trap::error(tf); } @@ -211,10 +194,7 @@ fn error(tf: &TrapFrame) { } #[no_mangle] -pub extern fn set_return_rsp(tf: &TrapFrame) { +pub unsafe extern fn set_return_rsp(tf: *const TrapFrame) { use crate::arch::gdt::Cpu; - use core::mem::size_of; - if tf.cs & 0x3 == 3 { - Cpu::current().set_ring0_rsp(tf as *const _ as usize + size_of::()); - } + Cpu::current().set_ring0_rsp(tf.add(1) as usize); } diff --git a/kernel/src/arch/x86_64/interrupt/mod.rs b/kernel/src/arch/x86_64/interrupt/mod.rs index db14851..e87657a 100644 --- a/kernel/src/arch/x86_64/interrupt/mod.rs +++ b/kernel/src/arch/x86_64/interrupt/mod.rs @@ -1,6 +1,7 @@ pub mod consts; mod handler; mod trapframe; +pub mod fast_syscall; pub use self::trapframe::*; pub use self::handler::*; diff --git a/kernel/src/arch/x86_64/interrupt/trap.asm b/kernel/src/arch/x86_64/interrupt/trap.asm index 2dcca39..d20528d 100644 --- a/kernel/src/arch/x86_64/interrupt/trap.asm +++ b/kernel/src/arch/x86_64/interrupt/trap.asm @@ -37,11 +37,11 @@ __alltraps: .byte 0x0f .byte 0xae .byte 0x00 - mov rbx, rsp - sub rbx, rax + mov rcx, rsp + sub rcx, rax # push fp state offset sub rsp, 16 - push rbx + push rcx mov rdi, rsp call rust_trap @@ -53,12 +53,12 @@ trap_ret: call set_return_rsp # pop fp state offset - pop rbx - cmp rbx, 16 # only 0-15 are valid + pop rcx + cmp rcx, 16 # only 0-15 are valid jge skip_fxrstor mov rax, rsp add rax, 16 - sub rax, rbx + sub rax, rcx # fxrstor (rax) .byte 0x0f .byte 0xae @@ -93,4 +93,136 @@ skip_fxrstor: # pop trap_num, error_code add rsp, 16 - iretq \ No newline at end of file + iretq + +.global syscall_entry +syscall_entry: + # syscall instruction do: + # - load cs + # - store rflags -> r11 + # - mask rflags + # - store rip -> rcx + # - load rip + + # swap in kernel gs + swapgs + # store user rsp -> scratch at TSS.sp1 + mov gs:[12], rsp + # load kernel rsp <- TSS.sp0 + mov rsp, gs:[4] + + push 0x23 # ss (WARN: match gdt) + push gs:[12] # rsp + push r11 # rflags + push 0x2b # cs (WARN: match gdt) + push rcx # rip + push 0 # error_code (dummy) + push 0 # trap_num (dummy) + + # swap out kernel gs + swapgs + + # enable interrupt + # sti + + push rax + push rcx + push rdx + push rdi + push rsi + push r8 + push r9 + push r10 + push r11 + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + # push fs.base + xor rax, rax + mov ecx, 0xC0000100 + rdmsr # msr[ecx] => edx:eax + shl rdx, 32 + or rdx, rax + push rdx + + # save fp registers + # align to 16 byte boundary + sub rsp, 512 + mov rax, rsp + and rax, 0xFFFFFFFFFFFFFFF0 + # fxsave (rax) + .byte 0x0f + .byte 0xae + .byte 0x00 + mov rcx, rsp + sub rcx, rax + # push fp state offset + sub rsp, 16 + push rcx + + mov rdi, rsp + call syscall + +syscall_return: + + # disable interrupt + cli + + mov rdi, rsp + call set_return_rsp + + # pop fp state offset + pop rcx + cmp rcx, 16 # only 0-15 are valid + jge skip_fxrstor1 + mov rax, rsp + add rax, 16 + sub rax, rcx + # fxrstor (rax) + .byte 0x0f + .byte 0xae + .byte 0x08 +skip_fxrstor1: + add rsp, 16+512 + + # pop fs.base + pop rax + mov rdx, rax + shr rdx, 32 + mov ecx, 0xC0000100 + wrmsr # msr[ecx] <= edx:eax + + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + + pop r11 + pop r10 + pop r9 + pop r8 + pop rsi + pop rdi + pop rdx + pop rcx + pop rax + + add rsp, 2*8 # trap_num, error_code + pop rcx # rip + add rsp, 1*8 # cs + pop r11 # rflags + pop rsp + + sysretq + + # sysretq instruction do: + # - load cs, ss + # - load rflags <- r11 + # - load rip <- rcx \ No newline at end of file diff --git a/kernel/src/arch/x86_64/interrupt/trapframe.rs b/kernel/src/arch/x86_64/interrupt/trapframe.rs index 31d9d52..1863286 100644 --- a/kernel/src/arch/x86_64/interrupt/trapframe.rs +++ b/kernel/src/arch/x86_64/interrupt/trapframe.rs @@ -79,7 +79,7 @@ impl TrapFrame { let mut tf = TrapFrame::default(); tf.cs = if is32 { gdt::UCODE32_SELECTOR.0 } else { gdt::UCODE_SELECTOR.0 } as usize; tf.rip = entry_addr; - tf.ss = if is32 { gdt::UDATA32_SELECTOR.0 } else { gdt::UDATA_SELECTOR.0 } as usize; + tf.ss = gdt::UDATA32_SELECTOR.0 as usize; tf.rsp = rsp; tf.rflags = 0x282; tf.fpstate_offset = 16; // skip restoring for first time @@ -198,8 +198,6 @@ impl Context { tf: { let mut tf = tf.clone(); tf.rax = 0; - // skip syscall inst; - tf.rip += 2; tf }, }.push_at(kstack_top) @@ -212,8 +210,6 @@ impl Context { tf.rsp = ustack_top; tf.fsbase = tls; tf.rax = 0; - // skip syscall inst; - tf.rip += 2; tf }, }.push_at(kstack_top) diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs index 2a9aee6..bc6f697 100644 --- a/kernel/src/arch/x86_64/mod.rs +++ b/kernel/src/arch/x86_64/mod.rs @@ -27,11 +27,11 @@ pub extern "C" fn _start(boot_info: &'static BootInfo) -> ! { // First init log mod, so that we can print log info. crate::logging::init(); - info!("Hello world!"); info!("{:#?}", boot_info); // Init trap handling. idt::init(); + interrupt::fast_syscall::init(); // Init physical memory management and heap. memory::init(boot_info); @@ -57,5 +57,6 @@ fn other_start() -> ! { idt::init(); gdt::init(); cpu::init(); + interrupt::fast_syscall::init(); crate::kmain(); } \ No newline at end of file