support fast syscall

master
WangRunji 6 years ago
parent 98b3b12c96
commit 9269a9856d

@ -3,6 +3,7 @@ use alloc::boxed::Box;
use x86_64::{PrivilegeLevel, VirtAddr};
use x86_64::structures::gdt::*;
use x86_64::structures::tss::TaskStateSegment;
use x86_64::registers::model_specific::Msr;
use crate::consts::MAX_CPU_NUM;
@ -49,12 +50,10 @@ impl Cpu {
// GDT
self.gdt.add_entry(KCODE);
self.gdt.add_entry(UCODE);
// KDATA use segment 0
// self.gdt.add_entry(KDATA);
self.gdt.add_entry(UDATA);
self.gdt.add_entry(KDATA);
self.gdt.add_entry(UCODE32);
self.gdt.add_entry(UDATA32);
self.gdt.add_entry(UCODE);
self.gdt.add_entry(Descriptor::tss_segment(&self.tss));
self.gdt.load();
@ -62,6 +61,10 @@ impl Cpu {
set_cs(KCODE_SELECTOR);
// load TSS
load_tss(TSS_SELECTOR);
// for fast syscall:
// store address of TSS to kernel_gsbase
let mut kernel_gsbase = Msr::new(0xC0000102);
kernel_gsbase.write(&self.tss as *const _ as u64);
}
/// 设置从Ring3跳到Ring0时自动切换栈的地址
@ -81,14 +84,15 @@ const UCODE: Descriptor = Descriptor::UserSegment(0x0020F80000000000); // EXECU
const KDATA: Descriptor = Descriptor::UserSegment(0x0000920000000000); // DATA_WRITABLE | USER_SEGMENT | PRESENT
const UDATA: Descriptor = Descriptor::UserSegment(0x0000F20000000000); // DATA_WRITABLE | USER_SEGMENT | USER_MODE | PRESENT
// Copied from xv6
const UCODE32: Descriptor = Descriptor::UserSegment(0x00cffa00_0000ffff);
// EXECUTABLE | USER_SEGMENT | USER_MODE | PRESENT
const UCODE32: Descriptor = Descriptor::UserSegment(0x00cffa00_0000ffff); // EXECUTABLE | USER_SEGMENT | USER_MODE | PRESENT
const UDATA32: Descriptor = Descriptor::UserSegment(0x00cff200_0000ffff); // EXECUTABLE | USER_SEGMENT | USER_MODE | PRESENT
// NOTICE: for fast syscall:
// STAR[47:32] = K_CS = K_SS - 8
// STAR[63:48] = U_CS32 = U_SS32 - 8 = U_CS - 16
pub const KCODE_SELECTOR: SegmentSelector = SegmentSelector::new(1, PrivilegeLevel::Ring0);
pub const UCODE_SELECTOR: SegmentSelector = SegmentSelector::new(2, PrivilegeLevel::Ring3);
pub const KDATA_SELECTOR: SegmentSelector = SegmentSelector::new(0, PrivilegeLevel::Ring0);
pub const UDATA_SELECTOR: SegmentSelector = SegmentSelector::new(3, PrivilegeLevel::Ring3);
pub const UCODE32_SELECTOR: SegmentSelector = SegmentSelector::new(4, PrivilegeLevel::Ring3);
pub const UDATA32_SELECTOR: SegmentSelector = SegmentSelector::new(5, PrivilegeLevel::Ring3);
pub const KDATA_SELECTOR: SegmentSelector = SegmentSelector::new(2, PrivilegeLevel::Ring0);
pub const UCODE32_SELECTOR: SegmentSelector = SegmentSelector::new(3, PrivilegeLevel::Ring3);
pub const UDATA32_SELECTOR: SegmentSelector = SegmentSelector::new(4, PrivilegeLevel::Ring3);
pub const UCODE_SELECTOR: SegmentSelector = SegmentSelector::new(5, PrivilegeLevel::Ring3);
pub const TSS_SELECTOR: SegmentSelector = SegmentSelector::new(6, PrivilegeLevel::Ring0);

@ -19,7 +19,7 @@ lazy_static! {
// * 某些保留中断号不允许设置会触发panic
// 于是下面用了一些trick绕过了它们
let ring3 = [SwitchToKernel, Syscall, Syscall32];
let ring3 = [Syscall32];
let mut idt = InterruptDescriptorTable::new();
let entries = unsafe{ &mut *(&mut idt as *mut _ as *mut [Entry<HandlerFunc>; 256]) };

@ -24,10 +24,7 @@ pub const VirtualizationException: u8 = 20;
pub const SecurityException: u8 = 30;
pub const IRQ0: u8 = 32;
pub const Syscall: u8 = 0x40;
pub const Syscall32: u8 = 0x80;
pub const SwitchToUser: u8 = 120;
pub const SwitchToKernel: u8 = 121;
// IRQ
pub const Timer: u8 = 0;

@ -0,0 +1,44 @@
/// `syscall` instruction
use x86_64::registers::model_specific::*;
use core::mem::transmute;
use super::super::gdt;
use super::TrapFrame;
pub fn init() {
unsafe {
Efer::update(|flags| {
*flags |= EferFlags::SYSTEM_CALL_EXTENSIONS;
});
let mut star = Msr::new(0xC0000081);
let mut lstar = Msr::new(0xC0000082);
let mut sfmask = Msr::new(0xC0000084);
// flags to clear on syscall
// copy from Linux 5.0
// TF|DF|IF|IOPL|AC|NT
let rflags_mask = 0x47700;
star.write(transmute(STAR));
lstar.write(syscall_entry as u64);
sfmask.write(rflags_mask);
}
}
extern {
fn syscall_entry();
}
#[repr(packed)]
struct StarMsr {
eip: u32,
kernel_cs: u16,
user_cs: u16,
}
const STAR: StarMsr = StarMsr {
eip: 0, // ignored in 64 bit mode
kernel_cs: gdt::KCODE_SELECTOR.0,
user_cs: gdt::UCODE32_SELECTOR.0,
};

@ -102,9 +102,6 @@ pub extern fn rust_trap(tf: &mut TrapFrame) {
},
}
}
SwitchToKernel => to_kernel(tf),
SwitchToUser => to_user(tf),
Syscall => syscall(tf),
Syscall32 => syscall32(tf),
InvalidOpcode => invalid_opcode(tf),
DivideError | GeneralProtectionFault => error(tf),
@ -167,22 +164,8 @@ fn ide() {
trace!("\nInterupt: IDE");
}
fn to_user(tf: &mut TrapFrame) {
use crate::arch::gdt;
info!("\nInterupt: To User");
tf.cs = gdt::UCODE_SELECTOR.0 as usize;
tf.ss = gdt::UDATA_SELECTOR.0 as usize;
tf.rflags |= 3 << 12; // 设置EFLAG的I/O特权位使得在用户态可使用in/out指令
}
fn to_kernel(tf: &mut TrapFrame) {
use crate::arch::gdt;
info!("\nInterupt: To Kernel");
tf.cs = gdt::KCODE_SELECTOR.0 as usize;
tf.ss = gdt::KDATA_SELECTOR.0 as usize;
}
fn syscall(tf: &mut TrapFrame) {
#[no_mangle]
pub extern "C" fn syscall(tf: &mut TrapFrame) {
trace!("\nInterupt: Syscall {:#x?}", tf.rax);
let ret = crate::syscall::syscall(tf.rax, [tf.rdi, tf.rsi, tf.rdx, tf.r10, tf.r8, tf.r9], tf);
tf.rax = ret as usize;
@ -199,8 +182,8 @@ fn invalid_opcode(tf: &mut TrapFrame) {
let opcode = unsafe { (tf.rip as *mut u16).read() };
const SYSCALL_OPCODE: u16 = 0x05_0f;
if opcode == SYSCALL_OPCODE {
tf.rip += 2; // must before syscall
syscall(tf);
tf.rip += 2;
} else {
crate::trap::error(tf);
}
@ -211,10 +194,7 @@ fn error(tf: &TrapFrame) {
}
#[no_mangle]
pub extern fn set_return_rsp(tf: &TrapFrame) {
pub unsafe extern fn set_return_rsp(tf: *const TrapFrame) {
use crate::arch::gdt::Cpu;
use core::mem::size_of;
if tf.cs & 0x3 == 3 {
Cpu::current().set_ring0_rsp(tf as *const _ as usize + size_of::<TrapFrame>());
}
Cpu::current().set_ring0_rsp(tf.add(1) as usize);
}

@ -1,6 +1,7 @@
pub mod consts;
mod handler;
mod trapframe;
pub mod fast_syscall;
pub use self::trapframe::*;
pub use self::handler::*;

@ -37,11 +37,11 @@ __alltraps:
.byte 0x0f
.byte 0xae
.byte 0x00
mov rbx, rsp
sub rbx, rax
mov rcx, rsp
sub rcx, rax
# push fp state offset
sub rsp, 16
push rbx
push rcx
mov rdi, rsp
call rust_trap
@ -53,12 +53,12 @@ trap_ret:
call set_return_rsp
# pop fp state offset
pop rbx
cmp rbx, 16 # only 0-15 are valid
pop rcx
cmp rcx, 16 # only 0-15 are valid
jge skip_fxrstor
mov rax, rsp
add rax, 16
sub rax, rbx
sub rax, rcx
# fxrstor (rax)
.byte 0x0f
.byte 0xae
@ -93,4 +93,136 @@ skip_fxrstor:
# pop trap_num, error_code
add rsp, 16
iretq
iretq
.global syscall_entry
syscall_entry:
# syscall instruction do:
# - load cs
# - store rflags -> r11
# - mask rflags
# - store rip -> rcx
# - load rip
# swap in kernel gs
swapgs
# store user rsp -> scratch at TSS.sp1
mov gs:[12], rsp
# load kernel rsp <- TSS.sp0
mov rsp, gs:[4]
push 0x23 # ss (WARN: match gdt)
push gs:[12] # rsp
push r11 # rflags
push 0x2b # cs (WARN: match gdt)
push rcx # rip
push 0 # error_code (dummy)
push 0 # trap_num (dummy)
# swap out kernel gs
swapgs
# enable interrupt
# sti
push rax
push rcx
push rdx
push rdi
push rsi
push r8
push r9
push r10
push r11
push rbx
push rbp
push r12
push r13
push r14
push r15
# push fs.base
xor rax, rax
mov ecx, 0xC0000100
rdmsr # msr[ecx] => edx:eax
shl rdx, 32
or rdx, rax
push rdx
# save fp registers
# align to 16 byte boundary
sub rsp, 512
mov rax, rsp
and rax, 0xFFFFFFFFFFFFFFF0
# fxsave (rax)
.byte 0x0f
.byte 0xae
.byte 0x00
mov rcx, rsp
sub rcx, rax
# push fp state offset
sub rsp, 16
push rcx
mov rdi, rsp
call syscall
syscall_return:
# disable interrupt
cli
mov rdi, rsp
call set_return_rsp
# pop fp state offset
pop rcx
cmp rcx, 16 # only 0-15 are valid
jge skip_fxrstor1
mov rax, rsp
add rax, 16
sub rax, rcx
# fxrstor (rax)
.byte 0x0f
.byte 0xae
.byte 0x08
skip_fxrstor1:
add rsp, 16+512
# pop fs.base
pop rax
mov rdx, rax
shr rdx, 32
mov ecx, 0xC0000100
wrmsr # msr[ecx] <= edx:eax
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
pop r11
pop r10
pop r9
pop r8
pop rsi
pop rdi
pop rdx
pop rcx
pop rax
add rsp, 2*8 # trap_num, error_code
pop rcx # rip
add rsp, 1*8 # cs
pop r11 # rflags
pop rsp
sysretq
# sysretq instruction do:
# - load cs, ss
# - load rflags <- r11
# - load rip <- rcx

@ -79,7 +79,7 @@ impl TrapFrame {
let mut tf = TrapFrame::default();
tf.cs = if is32 { gdt::UCODE32_SELECTOR.0 } else { gdt::UCODE_SELECTOR.0 } as usize;
tf.rip = entry_addr;
tf.ss = if is32 { gdt::UDATA32_SELECTOR.0 } else { gdt::UDATA_SELECTOR.0 } as usize;
tf.ss = gdt::UDATA32_SELECTOR.0 as usize;
tf.rsp = rsp;
tf.rflags = 0x282;
tf.fpstate_offset = 16; // skip restoring for first time
@ -198,8 +198,6 @@ impl Context {
tf: {
let mut tf = tf.clone();
tf.rax = 0;
// skip syscall inst;
tf.rip += 2;
tf
},
}.push_at(kstack_top)
@ -212,8 +210,6 @@ impl Context {
tf.rsp = ustack_top;
tf.fsbase = tls;
tf.rax = 0;
// skip syscall inst;
tf.rip += 2;
tf
},
}.push_at(kstack_top)

@ -27,11 +27,11 @@ pub extern "C" fn _start(boot_info: &'static BootInfo) -> ! {
// First init log mod, so that we can print log info.
crate::logging::init();
info!("Hello world!");
info!("{:#?}", boot_info);
// Init trap handling.
idt::init();
interrupt::fast_syscall::init();
// Init physical memory management and heap.
memory::init(boot_info);
@ -57,5 +57,6 @@ fn other_start() -> ! {
idt::init();
gdt::init();
cpu::init();
interrupt::fast_syscall::init();
crate::kmain();
}
Loading…
Cancel
Save