/* * linux/mm/memory.c * * (C) 1991 Linus Torvalds */ /* * demand-loading started 01.12.91 - seems it is high on the list of * things wanted, and it should be easy to implement. - Linus */ /* * Ok, demand-loading was easy, shared pages a little bit tricker. Shared * pages started 02.12.91, seems to work. - Linus. * * Tested sharing by executing about 30 /bin/sh: under the old kernel it * would have taken more than the 6M I have free, but it worked well as * far as I could see. * * Also corrected some "invalidate()"s - I wasn't doing enough of them. */ #include #include #include #include #include #include #include #include #include volatile void do_exit(long code); static inline volatile void oom(void) { printk("out of memory\n\r"); do_exit(SIGSEGV); } #define invalidate() \ __asm__("movl %%eax,%%cr3"::"a" (0)) /* these are not to be changed without changing head.s etc */ #define LOW_MEM 0x100000 #define PAGING_MEMORY (15*1024*1024) #define PAGING_PAGES (PAGING_MEMORY>>12) #define MAP_NR(addr) (((addr)-LOW_MEM)>>12) #define USED 100 #define CODE_SPACE(addr) ((((addr)+4095)&~4095) < \ current->start_code + current->end_code) static long HIGH_MEMORY = 0; #define copy_page(from,to) \ __asm__("cld ; rep ; movsl"::"S" (from),"D" (to),"c" (1024)) static unsigned char mem_map [ PAGING_PAGES ] = {0,}; /* * Get physical address of first (actually last :-) free page, and mark it * used. If no free pages left, return 0. */ unsigned long get_free_page(void) { register unsigned long __res asm("ax"); __asm__("std ; repne ; scasb\n\t" "jne 1f\n\t" "movb $1,1(%%edi)\n\t" "sall $12,%%ecx\n\t" "addl %2,%%ecx\n\t" "movl %%ecx,%%edx\n\t" "movl $1024,%%ecx\n\t" "leal 4092(%%edx),%%edi\n\t" "rep ; stosl\n\t" "movl %%edx,%%eax\n\t" "1:" "cld\n\t" /* by wyj */ :"=a" (__res) :"0" (0),"i" (LOW_MEM),"c" (PAGING_PAGES), "D" (mem_map+PAGING_PAGES-1) ); return __res; } /* * Free a page of memory at physical address 'addr'. Used by * 'free_page_tables()' */ void free_page(unsigned long addr) { if (addr < LOW_MEM) return; if (addr >= HIGH_MEMORY) panic("trying to free nonexistent page"); addr -= LOW_MEM; addr >>= 12; if (mem_map[addr]--) return; mem_map[addr]=0; panic("trying to free free page"); } /* * This function frees a continuos block of page tables, as needed * by 'exit()'. As does copy_page_tables(), this handles only 4Mb blocks. */ int free_page_tables(unsigned long from,unsigned long size) { unsigned long *pg_table; unsigned long * dir, nr; if (from & 0x3fffff) panic("free_page_tables called with wrong alignment"); if (!from) panic("Trying to free up swapper memory space"); size = (size + 0x3fffff) >> 22; dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */ for ( ; size-->0 ; dir++) { if (!(1 & *dir)) continue; pg_table = (unsigned long *) (0xfffff000 & *dir); for (nr=0 ; nr<1024 ; nr++) { if (1 & *pg_table) free_page(0xfffff000 & *pg_table); *pg_table = 0; pg_table++; } free_page(0xfffff000 & *dir); *dir = 0; } invalidate(); return 0; } /* * Well, here is one of the most complicated functions in mm. It * copies a range of linerar addresses by copying only the pages. * Let's hope this is bug-free, 'cause this one I don't want to debug :-) * * Note! We don't copy just any chunks of memory - addresses have to * be divisible by 4Mb (one page-directory entry), as this makes the * function easier. It's used only by fork anyway. * * NOTE 2!! When from==0 we are copying kernel space for the first * fork(). Then we DONT want to copy a full page-directory entry, as * that would lead to some serious memory waste - we just copy the * first 160 pages - 640kB. Even that is more than we need, but it * doesn't take any more memory - we don't copy-on-write in the low * 1 Mb-range, so the pages can be shared with the kernel. Thus the * special case for nr=xxxx. */ int copy_page_tables(unsigned long from,unsigned long to,long size) { unsigned long * from_page_table; unsigned long * to_page_table; unsigned long this_page; unsigned long * from_dir, * to_dir; unsigned long nr; if ((from&0x3fffff) || (to&0x3fffff)) panic("copy_page_tables called with wrong alignment"); from_dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */ to_dir = (unsigned long *) ((to>>20) & 0xffc); size = ((unsigned) (size+0x3fffff)) >> 22; for( ; size-->0 ; from_dir++,to_dir++) { if (1 & *to_dir) panic("copy_page_tables: already exist"); if (!(1 & *from_dir)) continue; from_page_table = (unsigned long *) (0xfffff000 & *from_dir); if (!(to_page_table = (unsigned long *) get_free_page())) return -1; /* Out of memory, see freeing */ *to_dir = ((unsigned long) to_page_table) | 7; nr = (from==0)?0xA0:1024; for ( ; nr-- > 0 ; from_page_table++,to_page_table++) { this_page = *from_page_table; if (!(1 & this_page)) continue; this_page &= ~2; *to_page_table = this_page; if (this_page > LOW_MEM) { *from_page_table = this_page; this_page -= LOW_MEM; this_page >>= 12; mem_map[this_page]++; } } } invalidate(); return 0; } /* * This function puts a page in memory at the wanted address. * It returns the physical address of the page gotten, 0 if * out of memory (either when trying to access page-table or * page.) */ unsigned long put_page(unsigned long page,unsigned long address) { unsigned long tmp, *page_table; /* NOTE !!! This uses the fact that _pg_dir=0 */ if (page < LOW_MEM || page >= HIGH_MEMORY) printk("Trying to put page %p at %p\n",page,address); if (mem_map[(page-LOW_MEM)>>12] != 1) printk("mem_map disagrees with %p at %p\n",page,address); page_table = (unsigned long *) ((address>>20) & 0xffc); if ((*page_table)&1) page_table = (unsigned long *) (0xfffff000 & *page_table); else { if (!(tmp=get_free_page())) return 0; *page_table = tmp|7; page_table = (unsigned long *) tmp; } page_table[(address>>12) & 0x3ff] = page | 7; /* no need for invalidate */ return page; } void un_wp_page(unsigned long * table_entry) { unsigned long old_page,new_page; old_page = 0xfffff000 & *table_entry; if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) { *table_entry |= 2; invalidate(); return; } if (!(new_page=get_free_page())) oom(); if (old_page >= LOW_MEM) mem_map[MAP_NR(old_page)]--; *table_entry = new_page | 7; invalidate(); copy_page(old_page,new_page); } /* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * If it's in code space we exit with a segment error. */ void do_wp_page(unsigned long error_code,unsigned long address) { #if 0 /* we cannot do this yet: the estdio library writes to code space */ /* stupid, stupid. I really want the libc.a from GNU */ if (CODE_SPACE(address)) do_exit(SIGSEGV); #endif un_wp_page((unsigned long *) (((address>>10) & 0xffc) + (0xfffff000 & *((unsigned long *) ((address>>20) &0xffc))))); } void write_verify(unsigned long address) { unsigned long page; if (!( (page = *((unsigned long *) ((address>>20) & 0xffc)) )&1)) return; page &= 0xfffff000; page += ((address>>10) & 0xffc); if ((3 & *(unsigned long *) page) == 1) /* non-writeable, present */ un_wp_page((unsigned long *) page); return; } void get_empty_page(unsigned long address) { unsigned long tmp; if (!(tmp=get_free_page()) || !put_page(tmp,address)) { free_page(tmp); /* 0 is ok - ignored */ oom(); } } /* * try_to_share() checks the page at address "address" in the task "p", * to see if it exists, and if it is clean. If so, share it with the current * task. * * NOTE! This assumes we have checked that p != current, and that they * share the same executable. */ static int try_to_share(unsigned long address, struct task_struct * p) { unsigned long from; unsigned long to; unsigned long from_page; unsigned long to_page; unsigned long phys_addr; from_page = to_page = ((address>>20) & 0xffc); from_page += ((p->start_code>>20) & 0xffc); to_page += ((current->start_code>>20) & 0xffc); /* is there a page-directory at from? */ from = *(unsigned long *) from_page; if (!(from & 1)) return 0; from &= 0xfffff000; from_page = from + ((address>>10) & 0xffc); phys_addr = *(unsigned long *) from_page; /* is the page clean and present? */ if ((phys_addr & 0x41) != 0x01) return 0; phys_addr &= 0xfffff000; if (phys_addr >= HIGH_MEMORY || phys_addr < LOW_MEM) return 0; to = *(unsigned long *) to_page; if (!(to & 1)) if (to = get_free_page()) *(unsigned long *) to_page = to | 7; else oom(); to &= 0xfffff000; to_page = to + ((address>>10) & 0xffc); if (1 & *(unsigned long *) to_page) panic("try_to_share: to_page already exists"); /* share them: write-protect */ *(unsigned long *) from_page &= ~2; *(unsigned long *) to_page = *(unsigned long *) from_page; invalidate(); phys_addr -= LOW_MEM; phys_addr >>= 12; mem_map[phys_addr]++; return 1; } /* * share_page() tries to find a process that could share a page with * the current one. Address is the address of the wanted page relative * to the current data space. * * We first check if it is at all feasible by checking executable->i_count. * It should be >1 if there are other tasks sharing this inode. */ static int share_page(unsigned long address) { struct task_struct ** p; if (!current->executable) return 0; if (current->executable->i_count < 2) return 0; for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) { if (!*p) continue; if (current == *p) continue; if ((*p)->executable != current->executable) continue; if (try_to_share(address,*p)) return 1; } return 0; } void do_no_page(unsigned long error_code,unsigned long address) { int nr[4]; unsigned long tmp; unsigned long page; int block,i; address &= 0xfffff000; tmp = address - current->start_code; if (!current->executable || tmp >= current->end_data) { get_empty_page(address); return; } if (share_page(tmp)) return; if (!(page = get_free_page())) oom(); /* remember that 1 block is used for header */ block = 1 + tmp/BLOCK_SIZE; for (i=0 ; i<4 ; block++,i++) nr[i] = bmap(current->executable,block); bread_page(page,current->executable->i_dev,nr); i = tmp + 4096 - current->end_data; tmp = page + 4096; while (i-- > 0) { tmp--; *(char *)tmp = 0; } if (put_page(page,address)) return; free_page(page); oom(); } void do_no_page1(unsigned long address) { int nr[4]; unsigned long tmp; unsigned long page; int block,i; address &= 0xfffff000; tmp = address - current->start_code; if (!current->executable || tmp >= current->end_data) { get_empty_page(address); return; } if (share_page(tmp)) return; if (!(page = get_free_page())) oom(); block = 1 + tmp/BLOCK_SIZE; for (i=0 ; i<4 ; block++,i++) nr[i] = bmap(current->executable,block); bread_page(page,current->executable->i_dev,nr); i = tmp + 4096 - current->end_data; tmp = page + 4096; while (i-- > 0) { tmp--; *(char *)tmp = 0; } if (put_page(page,address)) return; free_page(page); oom(); } void mem_init(long start_mem, long end_mem) { int i; HIGH_MEMORY = end_mem; for (i=0 ; i>= 12; while (end_mem-->0) mem_map[i++]=0; } void calc_mem(void) { int i,j,k,free=0; long * pg_tbl; for(i=0 ; i=NR_OPEN || count<0 || !(file=current->filp[fd])) return -EINVAL; if (!count) return 0; verify_area(buf,count); inode = file->f_inode; file->f_pos = off; if (inode->i_pipe) return (file->f_mode&1)?read_pipe(inode,buf,count):-EIO; if (S_ISCHR(inode->i_mode)) return rw_char(READ,inode->i_zone[0],buf,count,&file->f_pos); if (S_ISBLK(inode->i_mode)) return block_read(inode->i_zone[0],&file->f_pos,buf,count); if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode)) { if (count+file->f_pos > inode->i_size) count = inode->i_size - file->f_pos; if (count<=0) return 0; return file_read(inode,file,buf,count); } printk("(Read)inode->i_mode=%06o\n\r",inode->i_mode); return -EINVAL; } //long sys_mmap(void *start,size_t len,int prot,int flags,int fd,off_t off){ //有时需要建立共享内存 所以需要将其他进程的地址空间也映射到该文件 long sys_mmap(void *start,size_t len,...){ int prot = 3,flags=1,fd=3,off = 0; int block; size_t l=len; struct buffer_head * bh; void * buf=start; int tempprot=0; struct m_inode *dir = current->filp[fd]->f_inode; size_t size=len; if (size%PAGE_SIZE) size = size/4096*4096+4096; printk("Size is %ld \n",size); //必须要有读权限 如果没有 直接返回-1 if(!(prot&PROT_READ)){ printk(" Permission denied \n"); return -1; } //如果是空的 由其自己分配内存 if(start==NULL){ //需要由程序分配合适的地址空间 //由于是借助链表实现 所以需要搜索来找到合适的地址进行使用 if(current->mmap==NULL) buf = 0x2000000;//直接从中间 else if(current->mmap->next==NULL) buf = current->mmap->vm_end; } //初始化 struct vmarea_struct *m1 = (struct vmarea_struct *)malloc(sizeof(struct vmarea_struct)); m1->mode = prot; m1->flag = flags; m1->next = NULL; m1->size = size; m1->vm_start = buf; m1->vm_end = (unsigned long)buf+size; m1->fd = fd; m1->off = off; if(current->mmap==NULL) current->mmap = m1; else{//插到最后一个位置 struct vmarea_struct *m2=current->mmap; for(m2;m2->next;m2=m2->next) ; m2->next = m1; } printk("buf is %ld \n",buf); //接下来进行映射等操作 //参考了try_to_share的实现 unsigned long from; unsigned long to; unsigned long from_page; unsigned long to_page; unsigned long phys_addr; unsigned long page; unsigned long tmp; unsigned long vm_address=m1->vm_start; while(vm_addressvm_end){ from_page = to_page = ((vm_address>>20)&0xffc);//逻辑的页目录项偏移 from_page +=((current->start_code>>20)&0xffc);//该进程目录项地址 //from处是否存在页目录项 否则进行申请 from = *(unsigned long *) from_page; from &=0xfffff000; from_page = from +((vm_address>>10)&0xffc);//页表项指针 phys_addr = *(unsigned long *) from_page;//页表项内容 if(!(page = get_free_page())){ printk("no free page\n"); return -1; } /* phys_addr = page |0x7; mem_map[(page-LOW_MEM)>>12]++;*/ /*if(!(*(unsigned long *) from_page & 0x1)){ if(page = get_free_page()) phys_addr = page |0x7,mem_map[(page-LOW_MEM)>>12]++; else oom(); }*/ put_page(page,vm_address); //对页面分配权限 *(unsigned long *)from_page &= (prot&PROT_WRITE)?0xffffffff:0xfffffffd; //对flags进行处理,重点处理的是MAP_SHARE,私有的话不需要进行多余的处理 //需要将所有进程共享同一页面 if(flags&MAP_SHARED){ struct task_struct ** task; for (task = &LAST_TASK ; task>&FIRST_TASK;--task){ if (!*task) continue; else if (current == *task) continue; else if(!try_to_share(vm_address,*task)) printk("current pid %d cann't share memory\n",(*task)->pid); } } vm_address += PAGE_SIZE; } char *p1=(char *)buf; mem_read(fd,off,p1,size); /*if (!(block = dir->i_zone[0])) return NULL; if (!(bh = bread(dir->i_dev,block))) return NULL; char *s = (char *) bh->b_data; char *p1=(char *)s; char *p2=(char *)buf; while(len--) put_fs_byte(*(p1++),p2++);*/ return buf; } /*取消映射 取消映射分为以下几步: 1.从当前进程的虚拟地址结构出发,找到要取消映射的虚拟地址 2.判断权限 若是具有可写权限 需要将内容写入到对应的文件之中 可参考sys_wirte的实现 3.判断该虚拟地址是否是所有共享 4.清空该页面的内容 5.取消该进程/所有进程的读写权限 要注意的是: 该结构是一个单向链表 所以链表删除的时候需要谨慎一些 */ //取消进程p的共享 与try_share很类似 只是少了几步 static int cancle_share(unsigned long address, struct task_struct * p) { unsigned long from; unsigned long from_page; unsigned long phys_addr; from_page = ((address>>20) & 0xffc); from_page += ((p->start_code>>20) & 0xffc); /* is there a page-directory at from? */ from = *(unsigned long *) from_page; from &= 0xfffff000; from_page = from + ((address>>10) & 0xffc); phys_addr = *(unsigned long *) from_page; /* is the page clean and present? */ phys_addr &= 0xfffff000; if (phys_addr >= HIGH_MEMORY || phys_addr < LOW_MEM) return 0; /* share them: write-protect */ *(unsigned long *) from_page &= ~1; phys_addr -= LOW_MEM; phys_addr >>= 12; mem_map[phys_addr]--; return 1; } //如果页面具有写权限,很可能被修改过,所以需要将映射页面内容写入文件 //dir是映射文件i节点,off是从文件那个地方开始写,buf是要写到文件的内容,size是写多少字节 int mmap_wirte(int fd,off_t pos,char *buf,int count){ struct file * file; struct m_inode * inode; if (fd>=NR_OPEN || count <0 || !(file=current->filp[fd])) return -EINVAL; if (!count) return 0; inode=file->f_inode; file->f_pos = pos; if (inode->i_pipe) return (file->f_mode&2)?write_pipe(inode,buf,count):-EIO; if (S_ISCHR(inode->i_mode)) return rw_char(WRITE,inode->i_zone[0],buf,count,&file->f_pos); if (S_ISBLK(inode->i_mode)) return block_write(inode->i_zone[0],&file->f_pos,buf,count); if (S_ISREG(inode->i_mode)) return file_write(inode,file,buf,count); printk("(Write)inode->i_mode=%06o\n\r",inode->i_mode); return -EINVAL; } //将page页面的内容保存到对应的文件 int sys_munmap(void *address1,size_t len){ struct vmarea_struct *m1 = current->mmap; struct vmarea_struct *pre = NULL; //寻找对应的虚拟地址结构 if(!m1){ printk("No vm_aread struct address is NULL \n"); return 0; } //b遍历所有的结构来进行U型你找 for(m1;m1;m1=m1->next) if(m1->vm_start==address1){ pre = m1; break; } //需要判断一下是否是真的找到了 if(m1->vm_start!=address1){ printk("address is wrong \n"); return 0; } //判断权限 int j = 0; char *buf = (char *)address1; char *s1 = (char *)address1; if(m1->mode&PROT_WRITE){ if(!(mmap_wirte(m1->fd,m1->off,buf,m1->size))){ printk("wirte is wrong \n"); return -1; } } //进行页面的删除 其实很简单 只需要pre只能m1->next即可 pre->next = m1->next; //首先释放掉当前页面 unsigned long from; unsigned long to; unsigned long from_page; unsigned long to_page; unsigned long phys_addr; unsigned long tpage; unsigned long address=m1->vm_start; if(len%PAGE_SIZE) len = len/4096*4096+PAGE_SIZE; while(addressvm_start&&addressvm_end){ from_page = to_page = ((address>>20)&0xffc);//逻辑的页目录项偏移 from_page +=((current->start_code>>20)&0xffc);//该进程目录项地址 from = *(unsigned long *) from_page; from &=0xfffff000; from_page = from +((address>>10)&0xffc);//页表项指针 phys_addr = *(unsigned long *) from_page;//页表项内容 //主要是针对页表项进行修改 *(unsigned long *)from_page &= ~1; address += PAGE_SIZE; } //接下来对共享进行处理 //如果是共享的话 //当前进程已经处理过了 着重处理的是对应页面的进程 address = m1->vm_start; if(m1->flag&MAP_SHARED){ //其实与mmap设置共享内存类似,如何设置出来的就如何取消掉 //仍旧是参照try_to_share的实现 struct task_struct **p; for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) { //当前进程已经处理过了 就没必要再处理了 if(current == *p) continue; cancle_share(address,*p); } } //free(m1);//释放掉结构体占用的内存 return 0; }