You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

807 lines
20 KiB

2 years ago
/*
* linux/mm/memory.c
*
* (C) 1991 Linus Torvalds
*/
/*
* demand-loading started 01.12.91 - seems it is high on the list of
* things wanted, and it should be easy to implement. - Linus
*/
/*
* Ok, demand-loading was easy, shared pages a little bit tricker. Shared
* pages started 02.12.91, seems to work. - Linus.
*
* Tested sharing by executing about 30 /bin/sh: under the old kernel it
* would have taken more than the 6M I have free, but it worked well as
* far as I could see.
*
* Also corrected some "invalidate()"s - I wasn't doing enough of them.
*/
#include <stdarg.h>
2 years ago
#include <signal.h>
#include <asm/system.h>
#include <linux/sched.h>
#include <linux/head.h>
#include <linux/kernel.h>
#include <new.h>
#include <asm/segment.h>
2 years ago
volatile void do_exit(long code);
static inline volatile void oom(void)
{
printk("out of memory\n\r");
do_exit(SIGSEGV);
}
#define invalidate() \
__asm__("movl %%eax,%%cr3"::"a" (0))
/* these are not to be changed without changing head.s etc */
#define LOW_MEM 0x100000
#define PAGING_MEMORY (15*1024*1024)
#define PAGING_PAGES (PAGING_MEMORY>>12)
#define MAP_NR(addr) (((addr)-LOW_MEM)>>12)
#define USED 100
#define CODE_SPACE(addr) ((((addr)+4095)&~4095) < \
current->start_code + current->end_code)
static long HIGH_MEMORY = 0;
#define copy_page(from,to) \
__asm__("cld ; rep ; movsl"::"S" (from),"D" (to),"c" (1024))
static unsigned char mem_map [ PAGING_PAGES ] = {0,};
/*
* Get physical address of first (actually last :-) free page, and mark it
* used. If no free pages left, return 0.
*/
unsigned long get_free_page(void)
{
register unsigned long __res asm("ax");
__asm__("std ; repne ; scasb\n\t"
"jne 1f\n\t"
"movb $1,1(%%edi)\n\t"
"sall $12,%%ecx\n\t"
"addl %2,%%ecx\n\t"
"movl %%ecx,%%edx\n\t"
"movl $1024,%%ecx\n\t"
"leal 4092(%%edx),%%edi\n\t"
"rep ; stosl\n\t"
"movl %%edx,%%eax\n\t"
"1:"
"cld\n\t" /* by wyj */
:"=a" (__res)
:"0" (0),"i" (LOW_MEM),"c" (PAGING_PAGES),
"D" (mem_map+PAGING_PAGES-1)
);
return __res;
}
/*
* Free a page of memory at physical address 'addr'. Used by
* 'free_page_tables()'
*/
void free_page(unsigned long addr)
{
if (addr < LOW_MEM) return;
if (addr >= HIGH_MEMORY)
panic("trying to free nonexistent page");
addr -= LOW_MEM;
addr >>= 12;
if (mem_map[addr]--) return;
mem_map[addr]=0;
panic("trying to free free page");
}
/*
* This function frees a continuos block of page tables, as needed
* by 'exit()'. As does copy_page_tables(), this handles only 4Mb blocks.
*/
int free_page_tables(unsigned long from,unsigned long size)
{
unsigned long *pg_table;
unsigned long * dir, nr;
if (from & 0x3fffff)
panic("free_page_tables called with wrong alignment");
if (!from)
panic("Trying to free up swapper memory space");
size = (size + 0x3fffff) >> 22;
dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */
for ( ; size-->0 ; dir++) {
if (!(1 & *dir))
continue;
pg_table = (unsigned long *) (0xfffff000 & *dir);
for (nr=0 ; nr<1024 ; nr++) {
if (1 & *pg_table)
free_page(0xfffff000 & *pg_table);
*pg_table = 0;
pg_table++;
}
free_page(0xfffff000 & *dir);
*dir = 0;
}
invalidate();
return 0;
}
/*
* Well, here is one of the most complicated functions in mm. It
* copies a range of linerar addresses by copying only the pages.
* Let's hope this is bug-free, 'cause this one I don't want to debug :-)
*
* Note! We don't copy just any chunks of memory - addresses have to
* be divisible by 4Mb (one page-directory entry), as this makes the
* function easier. It's used only by fork anyway.
*
* NOTE 2!! When from==0 we are copying kernel space for the first
* fork(). Then we DONT want to copy a full page-directory entry, as
* that would lead to some serious memory waste - we just copy the
* first 160 pages - 640kB. Even that is more than we need, but it
* doesn't take any more memory - we don't copy-on-write in the low
* 1 Mb-range, so the pages can be shared with the kernel. Thus the
* special case for nr=xxxx.
*/
int copy_page_tables(unsigned long from,unsigned long to,long size)
{
unsigned long * from_page_table;
unsigned long * to_page_table;
unsigned long this_page;
unsigned long * from_dir, * to_dir;
unsigned long nr;
if ((from&0x3fffff) || (to&0x3fffff))
panic("copy_page_tables called with wrong alignment");
from_dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */
to_dir = (unsigned long *) ((to>>20) & 0xffc);
size = ((unsigned) (size+0x3fffff)) >> 22;
for( ; size-->0 ; from_dir++,to_dir++) {
if (1 & *to_dir)
panic("copy_page_tables: already exist");
if (!(1 & *from_dir))
continue;
from_page_table = (unsigned long *) (0xfffff000 & *from_dir);
if (!(to_page_table = (unsigned long *) get_free_page()))
return -1; /* Out of memory, see freeing */
*to_dir = ((unsigned long) to_page_table) | 7;
nr = (from==0)?0xA0:1024;
for ( ; nr-- > 0 ; from_page_table++,to_page_table++) {
this_page = *from_page_table;
if (!(1 & this_page))
continue;
this_page &= ~2;
*to_page_table = this_page;
if (this_page > LOW_MEM) {
*from_page_table = this_page;
this_page -= LOW_MEM;
this_page >>= 12;
mem_map[this_page]++;
}
}
}
invalidate();
return 0;
}
/*
* This function puts a page in memory at the wanted address.
* It returns the physical address of the page gotten, 0 if
* out of memory (either when trying to access page-table or
* page.)
*/
unsigned long put_page(unsigned long page,unsigned long address)
{
unsigned long tmp, *page_table;
/* NOTE !!! This uses the fact that _pg_dir=0 */
if (page < LOW_MEM || page >= HIGH_MEMORY)
printk("Trying to put page %p at %p\n",page,address);
if (mem_map[(page-LOW_MEM)>>12] != 1)
printk("mem_map disagrees with %p at %p\n",page,address);
page_table = (unsigned long *) ((address>>20) & 0xffc);
if ((*page_table)&1)
page_table = (unsigned long *) (0xfffff000 & *page_table);
else {
if (!(tmp=get_free_page()))
return 0;
*page_table = tmp|7;
page_table = (unsigned long *) tmp;
}
page_table[(address>>12) & 0x3ff] = page | 7;
/* no need for invalidate */
return page;
}
void un_wp_page(unsigned long * table_entry)
{
unsigned long old_page,new_page;
old_page = 0xfffff000 & *table_entry;
if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) {
*table_entry |= 2;
invalidate();
return;
}
if (!(new_page=get_free_page()))
oom();
if (old_page >= LOW_MEM)
mem_map[MAP_NR(old_page)]--;
*table_entry = new_page | 7;
invalidate();
copy_page(old_page,new_page);
}
/*
* This routine handles present pages, when users try to write
* to a shared page. It is done by copying the page to a new address
* and decrementing the shared-page counter for the old page.
*
* If it's in code space we exit with a segment error.
*/
void do_wp_page(unsigned long error_code,unsigned long address)
{
#if 0
/* we cannot do this yet: the estdio library writes to code space */
/* stupid, stupid. I really want the libc.a from GNU */
if (CODE_SPACE(address))
do_exit(SIGSEGV);
#endif
un_wp_page((unsigned long *)
(((address>>10) & 0xffc) + (0xfffff000 &
*((unsigned long *) ((address>>20) &0xffc)))));
}
void write_verify(unsigned long address)
{
unsigned long page;
if (!( (page = *((unsigned long *) ((address>>20) & 0xffc)) )&1))
return;
page &= 0xfffff000;
page += ((address>>10) & 0xffc);
if ((3 & *(unsigned long *) page) == 1) /* non-writeable, present */
un_wp_page((unsigned long *) page);
return;
}
void get_empty_page(unsigned long address)
{
unsigned long tmp;
if (!(tmp=get_free_page()) || !put_page(tmp,address)) {
free_page(tmp); /* 0 is ok - ignored */
oom();
}
}
/*
* try_to_share() checks the page at address "address" in the task "p",
* to see if it exists, and if it is clean. If so, share it with the current
* task.
*
* NOTE! This assumes we have checked that p != current, and that they
* share the same executable.
*/
static int try_to_share(unsigned long address, struct task_struct * p)
{
unsigned long from;
unsigned long to;
unsigned long from_page;
unsigned long to_page;
unsigned long phys_addr;
from_page = to_page = ((address>>20) & 0xffc);
from_page += ((p->start_code>>20) & 0xffc);
to_page += ((current->start_code>>20) & 0xffc);
/* is there a page-directory at from? */
from = *(unsigned long *) from_page;
if (!(from & 1))
return 0;
from &= 0xfffff000;
from_page = from + ((address>>10) & 0xffc);
phys_addr = *(unsigned long *) from_page;
/* is the page clean and present? */
if ((phys_addr & 0x41) != 0x01)
return 0;
phys_addr &= 0xfffff000;
if (phys_addr >= HIGH_MEMORY || phys_addr < LOW_MEM)
return 0;
to = *(unsigned long *) to_page;
if (!(to & 1))
if (to = get_free_page())
*(unsigned long *) to_page = to | 7;
else
oom();
to &= 0xfffff000;
to_page = to + ((address>>10) & 0xffc);
if (1 & *(unsigned long *) to_page)
panic("try_to_share: to_page already exists");
/* share them: write-protect */
*(unsigned long *) from_page &= ~2;
*(unsigned long *) to_page = *(unsigned long *) from_page;
invalidate();
phys_addr -= LOW_MEM;
phys_addr >>= 12;
mem_map[phys_addr]++;
return 1;
}
/*
* share_page() tries to find a process that could share a page with
* the current one. Address is the address of the wanted page relative
* to the current data space.
*
* We first check if it is at all feasible by checking executable->i_count.
* It should be >1 if there are other tasks sharing this inode.
*/
static int share_page(unsigned long address)
{
struct task_struct ** p;
if (!current->executable)
return 0;
if (current->executable->i_count < 2)
return 0;
for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
if (!*p)
continue;
if (current == *p)
continue;
if ((*p)->executable != current->executable)
continue;
if (try_to_share(address,*p))
return 1;
}
return 0;
}
void do_no_page(unsigned long error_code,unsigned long address)
{
int nr[4];
unsigned long tmp;
unsigned long page;
int block,i;
/* if (current->pid > 5)
printk(" --do_no_page: address=%x, pid=%d\n", address, current->pid); */
2 years ago
address &= 0xfffff000;
tmp = address - current->start_code;
if (!current->executable || tmp >= current->end_data) {
get_empty_page(address);
return;
}
if (share_page(tmp))
return;
if (!(page = get_free_page()))
oom();
/* remember that 1 block is used for header */
block = 1 + tmp/BLOCK_SIZE;
for (i=0 ; i<4 ; block++,i++)
nr[i] = bmap(current->executable,block);
bread_page(page,current->executable->i_dev,nr);
i = tmp + 4096 - current->end_data;
tmp = page + 4096;
while (i-- > 0) {
tmp--;
*(char *)tmp = 0;
}
if (put_page(page,address))
return;
free_page(page);
oom();
}
void mem_init(long start_mem, long end_mem)
{
int i;
HIGH_MEMORY = end_mem;
for (i=0 ; i<PAGING_PAGES ; i++)
mem_map[i] = USED;
i = MAP_NR(start_mem);
end_mem -= start_mem;
end_mem >>= 12;
while (end_mem-->0)
mem_map[i++]=0;
}
int do_execve2(unsigned long * eip,long tmp,char * filename,
char ** argv, char ** envp)
{
int ans=0;
if((ans=do_execve(eip,tmp,filename,argv,envp)))
return ans;
int nr[4];
unsigned long temp;
unsigned long page;
int block,i;
unsigned long address=current->start_code;
//this part comes from do_no_page as achievement of immediate loading of cs and ds
while(address<=current->brk+current->start_code)
{
address &= 0xfffff000;
temp = address - current->start_code;
if (share_page(temp))
return -1;
if (!(page = get_free_page()))
oom();
block = 1 + temp/BLOCK_SIZE;
for (i=0 ; i<4 ; block++,i++)
nr[i] = bmap(current->executable,block);
bread_page(page,current->executable->i_dev,nr);
if (!put_page(page,address))
{
free_page(page);
oom();
}
address+=PAGE_SIZE;
}
return 0;
}
void calc_mem(void)
{
int i,j,k,free=0;
long * pg_tbl;
for(i=0 ; i<PAGING_PAGES ; i++)
if (!mem_map[i]) free++;
printk("%d pages free (of %d)\n\r",free,PAGING_PAGES);
for(i=2 ; i<1024 ; i++) {
if (1&pg_dir[i]) {
pg_tbl=(long *) (0xfffff000 & pg_dir[i]);
for(j=k=0 ; j<1024 ; j++)
if (pg_tbl[j]&1)
k++;
printk("Pg-dir[%d] uses %d pages\n",i,k);
}
}
}
static int my_share(unsigned long address, struct task_struct * p)
{
unsigned long from;
unsigned long to;
unsigned long from_page;
unsigned long to_page;
unsigned long phys_addr;
from_page = to_page = ((address>>20) & 0xffc);
from_page += ((current->start_code>>20) & 0xffc);
to_page += ((p->start_code>>20) & 0xffc);
from = *(unsigned long *) from_page;
from &= 0xfffff000;
from_page = from + ((address>>10) & 0xffc);
phys_addr = *(unsigned long *) from_page;
to = *(unsigned long *) to_page;
if (!(to & 1))
if (to = get_free_page())
*(unsigned long *) to_page = to | 7;
else
return 0;
to &= 0xfffff000;
to_page = to + ((address>>10) & 0xffc);
if (1 & *(unsigned long *) to_page)
return 0;
/* share them: write-protect */
*(unsigned long *) to_page = *(unsigned long *) from_page;
phys_addr -= LOW_MEM;
phys_addr >>= 12;
mem_map[phys_addr]++;
return 1;
}
// rememeber to sync when munmap exit manual
long sys_mmap(void * start,size_t len,...)// int prot,int flags,int fd,off_t off
{
/* va_list args;
va_start(args,len); */
int prot=3,flags=2,fd=3,off=0;
/* prot=va_arg(args,int);flags=va_arg(args,int);fd=va_arg(args,int);
off_t off=va_arg(args,off_t);
int *ll=&len;printk("%d,%d,%d,%d,%d\n",*ll,*(ll+1),*(ll+2),*(ll+3),*(ll+4));
va_end(args);
printk("%d,%d,%d,%ld\n",prot,flags,fd,off);
*/
int block;
size_t l=len;
struct buffer_head * bh;
char *temp,*p1,*p2;
void * buf=start;
int tempprot=0;
//因为new.h和namei.c定义不一样要修改
if(prot &PROT_READ) tempprot |=4;
if(prot &PROT_EXEC) tempprot |=1;
if(prot &PROT_WRITE) tempprot |=2;
//特殊量len>0 off%4096==0 prot至少有read权限
if( (flags & MAP_SHARED)&& (flags &MAP_PRIVATE ))
{
printk("error with private and shared request.\n");
return -1;
}
if(len<=0 || off<0 || (off%PAGE_SIZE))
{
if(len<=0)
printk("the len of the wanting bytes must be positive.\n");
else
printk("offset must be the mul of 4096.\n");
return MAP_FAILED;
}
if(!(prot& PROT_READ)) return MAP_FAILED;
//permissions of the file ,搬运函数namei.c::permissions
struct m_inode *inode=current->filp[fd]->f_inode;
int mode = inode->i_mode;
if (inode->i_dev && !inode->i_nlinks)
return MAP_FAILED;
else if (current->euid==inode->i_uid)
mode >>= 6;
else if (current->egid==inode->i_gid)
mode >>= 3;
if (!(((mode & tempprot & 0007) == tempprot) || suser()))
{
printk("error permission.");
return MAP_FAILED;
}
//end of permission check
//设置用户空间的虚拟地址初始为32MB处这样写在brk增长至32MB时会有bug,我们忽略
//同时,当栈向下增长到一定程度时,也会和当前地址冲突,我们忽略
if(start<=current->brk || start>=current->start_stack-0x8000 || start==NULL)
{
if(!current->mmap_tail)
buf=0x2000000;
else
buf=current->mmap_tail->endaddr;
}
//申请mmap
struct mmap_struct *mmap=(struct mmap_struct *)malloc(sizeof(struct mmap_struct));
mmap->address=buf;
mmap->size=len;
mmap->endaddr=(unsigned long)buf+(len + 0xfff)&0xfffff000;
mmap->rw=prot;
mmap->ps=flags;
mmap->filenode=(struct m_inode*)malloc(sizeof(struct m_inode));
memcpy(mmap->filenode,inode,sizeof(struct m_inode));
if(!mmap->filenode->i_count)mmap->filenode->i_count=1;
mmap->off=off;
mmap->next=NULL;
//加入current->mmap
if(!current->mmap)
{current->mmap=current->mmap_tail=mmap;}
else {
current->mmap_tail->next=mmap;
current->mmap_tail=mmap;
}
//分配物理页
unsigned long from;
unsigned long to;
unsigned long from_page;
unsigned long to_page;
unsigned long phys_addr;
unsigned long tpage;
unsigned long address=mmap->address;
while(address<mmap->endaddr)
{
from_page = ((address>>20) & 0xffc);
from_page += ((current->start_code>>20) & 0xffc);//页目录
if(!(*(unsigned long *)from_page&1)) //页表项不存在,申请页面
if(!(tpage=get_free_page()))
{
printk("out of memory.\n");
return MAP_FAILED;
}
else
{
*(unsigned long *)from_page= tpage | 7;
mem_map[(tpage-LOW_MEM)>>12]++;
}
from=*(unsigned long *)from_page; //页表项
from &= 0xfffff000;
from_page = from + ((address>>10) & 0xffc);
if(!(*(unsigned long *)from_page & 1)) //物理页不存在
if(!(tpage=get_free_page()))
{
printk("out of memory.\n");
return MAP_FAILED;
}
else
{
*(unsigned long *)from_page= tpage | 7;
mem_map[(tpage-LOW_MEM)>>12]++;
}
*(unsigned long *)from_page &= (prot&PROT_WRITE); //分配读写权限
if(flags & MAP_SHARED)
{
//printk("i am doing sharing.");
struct task_struct ** p;
for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
if (!*p)
continue;
if (current == *p)
continue;
//在这一部分,我将current->address处的物理页复制给
//所有进程的该区域,可能导致错误,但我们忽略。
if(!my_share(address,*p))
printk("share failed at task:pid=%d",(*p)->pid);
}
}
address+=PAGE_SIZE;
}
//read and put in
if (!(block = inode->i_zone[0]))
return NULL;
if (!(bh = bread(inode->i_dev,block)))
return NULL;
temp = (char *) bh->b_data;
p1=temp+off;p2=buf;
while(l--)
put_fs_byte(*(p1++),p2++);
return buf;
}
static int my_sync(off_t pos, struct m_inode * inode, char * buf, int count)
{
int block,c;
struct buffer_head * bh;
char * p;
int i=0;
while (i<count) {
if (!(block = create_block(inode,pos/BLOCK_SIZE)))
break;
if (!(bh=bread(inode->i_dev,block)))
break;
c = pos % BLOCK_SIZE;
p = c + bh->b_data;
bh->b_dirt = 1;
c = BLOCK_SIZE-c;
if (c > count-i) c = count-i;
pos += c;
if (pos > inode->i_size) {
inode->i_size = pos;
inode->i_dirt = 1;
}
i += c;
while (c-->0)
*(p++) = get_fs_byte(buf++);
brelse(bh);
}
inode->i_mtime = CURRENT_TIME;//文件最后修改时间
inode->i_ctime = CURRENT_TIME;//i节点修改时间
return (i?i:-1);
}
int sys_munmap(void * start ,size_t size)
{
struct mmap_struct *temp=current->mmap;
struct mmap_struct *pre=NULL;
while(temp!=NULL)
{
if(temp->address==(unsigned long)start)
break;
else temp=temp->next;
pre=temp;
}
if(!temp)
{
printk("invalid address.\n");
return -1;
}
if(!pre)
{
if(current->mmap_tail==current->mmap)current->mmap_tail=NULL;
current->mmap==current->mmap->next;
}
if(temp->next=NULL)
{
current->mmap_tail=pre;
pre->next=NULL;
}
else{
pre->next=temp->next;
}
unsigned long from;
unsigned long to;
unsigned long from_page;
unsigned long to_page;
unsigned long phys_addr;
unsigned long tpage;
unsigned long address=temp->address;
if(!(temp->ps & MAP_SHARED))
goto mytag;
//若为共享,执行下面的代码
while (address < temp ->endaddr)
{
printk("addr: %lx\n",address);
from_page = ((address>>20) & 0xffc);
from_page += ((current->start_code>>20) & 0xffc);//页目录
from=*(unsigned long *)from_page; //页表项
from &= 0xfffff000;
from_page = from + ((address>>10) & 0xffc);
phys_addr = (*(unsigned long *)from_page & 0xfffff000);
//printk("addr: %lx",*(unsigned long *)from_page);
if(!(*(unsigned long *)from_page & 1))
{
printk("error of deleting page.");
return -1;
}
if(temp->ps & MAP_SHARED)
{
struct task_struct ** p;
for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
if (!*p)
continue;
if (current == *p)
continue;
to_page = ((address>>20) & 0xffc)+(((*p)->start_code>>20) & 0xffc);
to = *(unsigned long *) to_page;
if(!(to & 1))
continue;
to &= 0xfffff000;
to_page = to + ((address>>10) & 0xffc);
if(!(*(unsigned long *)to_page &1))
continue;
to = (*(unsigned long *)to_page & 0xfffff000);
if(phys_addr == to)
{
*(unsigned long *)to_page &= ~1;
mem_map[(phys_addr-LOW_MEM)>>12]--;
}
}
}
address+=PAGE_SIZE;
}
if(temp->ps & MAP_SHARED)
{
if(my_sync(temp->off,temp->filenode,(char*)(temp->address),temp->size)==-1)
return -1;
}
mytag:
//取消物理页写权限
while (address < temp ->endaddr)
{
from_page = ((address>>20) & 0xffc);
from_page += ((current->start_code>>20) & 0xffc);//页目录
from=*(unsigned long *)from_page; //页表项
from &= 0xfffff000;
from_page = from + ((address>>10) & 0xffc);
phys_addr = (*(unsigned long *)from_page & 0xfffff000);
//printk("addr: %lx",*(unsigned long *)from_page);
if(!(*(unsigned long *)from_page & 1))
{
printk("error of deleting page.");
return -1;
}
else{
*(unsigned long *)from_page &= ~1;
}
address+=PAGE_SIZE;
}
free(temp->filenode);
free(temp);
return 0;
}