Compare commits

...

9 Commits
v0.0 ... master

Binary file not shown.

Binary file not shown.

@ -0,0 +1,446 @@
/*
* linux/fs/buffer.c
*
* (C) 1991 Linus Torvalds
*/
/*
* 'buffer.c' implements the buffer-cache functions. Race-conditions have
* been avoided by NEVER letting a interrupt change a buffer (except for the
* data, of course), but instead letting the caller do it. NOTE! As interrupts
* can wake up a caller, some cli-sti sequences are needed to check for
* sleep-on-calls. These should be extremely quick, though (I hope).
*/
/*
* NOTE! There is one discordant note here: checking floppies for
* disk change. This is where it fits best, I think, as it should
* invalidate changed floppy-disk-caches.
*/
#include <stdarg.h>
#include <linux/config.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <asm/system.h>
#include <asm/io.h>
#include <linux/fs.h>
#include <asm/segment.h>
#define NAME_MAX 255
extern void put_super(int dev);
extern void invalidate_inodes(int dev);
extern int end;
struct buffer_head * start_buffer = (struct buffer_head *) &end;
struct buffer_head * hash_table[NR_HASH];
static struct buffer_head * free_list;
static struct task_struct * buffer_wait = NULL;
int NR_BUFFERS = 0;
/*coded by dj*/
struct linux_dirent
{
long d_ino; /* inode number 索引节点号 */
off_t d_off; /* offset to this dirent 在目录文件中的偏移 */
unsigned short d_reclen; /* length of this d_name 文件名长 */
char d_name[NAME_MAX+1]; /* file name (null-terminated) 文件名最长255字符 */
};
/*coded by dj*/
static inline void wait_on_buffer(struct buffer_head * bh)
{
cli();
while (bh->b_lock)
sleep_on(&bh->b_wait);
sti();
}
int sys_sync(void)
{
int i;
struct buffer_head * bh;
sync_inodes(); /* write out inodes into buffers */
bh = start_buffer;
for (i=0 ; i<NR_BUFFERS ; i++,bh++) {
wait_on_buffer(bh);
if (bh->b_dirt)
ll_rw_block(WRITE,bh);
}
return 0;
}
int sync_dev(int dev)
{
int i;
struct buffer_head * bh;
bh = start_buffer;
for (i=0 ; i<NR_BUFFERS ; i++,bh++) {
if (bh->b_dev != dev)
continue;
wait_on_buffer(bh);
if (bh->b_dev == dev && bh->b_dirt)
ll_rw_block(WRITE,bh);
}
sync_inodes();
bh = start_buffer;
for (i=0 ; i<NR_BUFFERS ; i++,bh++) {
if (bh->b_dev != dev)
continue;
wait_on_buffer(bh);
if (bh->b_dev == dev && bh->b_dirt)
ll_rw_block(WRITE,bh);
}
return 0;
}
void inline invalidate_buffers(int dev)
{
int i;
struct buffer_head * bh;
bh = start_buffer;
for (i=0 ; i<NR_BUFFERS ; i++,bh++) {
if (bh->b_dev != dev)
continue;
wait_on_buffer(bh);
if (bh->b_dev == dev)
bh->b_uptodate = bh->b_dirt = 0;
}
}
/*
* This routine checks whether a floppy has been changed, and
* invalidates all buffer-cache-entries in that case. This
* is a relatively slow routine, so we have to try to minimize using
* it. Thus it is called only upon a 'mount' or 'open'. This
* is the best way of combining speed and utility, I think.
* People changing diskettes in the middle of an operation deserve
* to loose :-)
*
* NOTE! Although currently this is only for floppies, the idea is
* that any additional removable block-device will use this routine,
* and that mount/open needn't know that floppies/whatever are
* special.
*/
void check_disk_change(int dev)
{
int i;
if (MAJOR(dev) != 2)
return;
if (!floppy_change(dev & 0x03))
return;
for (i=0 ; i<NR_SUPER ; i++)
if (super_block[i].s_dev == dev)
put_super(super_block[i].s_dev);
invalidate_inodes(dev);
invalidate_buffers(dev);
}
#define _hashfn(dev,block) (((unsigned)(dev^block))%NR_HASH)
#define hash(dev,block) hash_table[_hashfn(dev,block)]
static inline void remove_from_queues(struct buffer_head * bh)
{
/* remove from hash-queue */
if (bh->b_next)
bh->b_next->b_prev = bh->b_prev;
if (bh->b_prev)
bh->b_prev->b_next = bh->b_next;
if (hash(bh->b_dev,bh->b_blocknr) == bh)
hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
/* remove from free list */
if (!(bh->b_prev_free) || !(bh->b_next_free))
panic("Free block list corrupted");
bh->b_prev_free->b_next_free = bh->b_next_free;
bh->b_next_free->b_prev_free = bh->b_prev_free;
if (free_list == bh)
free_list = bh->b_next_free;
}
static inline void insert_into_queues(struct buffer_head * bh)
{
/* put at end of free list */
bh->b_next_free = free_list;
bh->b_prev_free = free_list->b_prev_free;
free_list->b_prev_free->b_next_free = bh;
free_list->b_prev_free = bh;
/* put the buffer in new hash-queue if it has a device */
bh->b_prev = NULL;
bh->b_next = NULL;
if (!bh->b_dev)
return;
bh->b_next = hash(bh->b_dev,bh->b_blocknr);
hash(bh->b_dev,bh->b_blocknr) = bh;
bh->b_next->b_prev = bh;
}
static struct buffer_head * find_buffer(int dev, int block)
{
struct buffer_head * tmp;
for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
if (tmp->b_dev==dev && tmp->b_blocknr==block)
return tmp;
return NULL;
}
/*
* Why like this, I hear you say... The reason is race-conditions.
* As we don't lock buffers (unless we are readint them, that is),
* something might happen to it while we sleep (ie a read-error
* will force it bad). This shouldn't really happen currently, but
* the code is ready.
*/
struct buffer_head * get_hash_table(int dev, int block)
{
struct buffer_head * bh;
for (;;) {
if (!(bh=find_buffer(dev,block)))
return NULL;
bh->b_count++;
wait_on_buffer(bh);
if (bh->b_dev == dev && bh->b_blocknr == block)
return bh;
bh->b_count--;
}
}
/*
* Ok, this is getblk, and it isn't very clear, again to hinder
* race-conditions. Most of the code is seldom used, (ie repeating),
* so it should be much more efficient than it looks.
*
* The algoritm is changed: hopefully better, and an elusive bug removed.
*/
#define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
struct buffer_head * getblk(int dev,int block)
{
struct buffer_head * tmp, * bh;
repeat:
if (bh = get_hash_table(dev,block))
return bh;
tmp = free_list;
do {
if (tmp->b_count)
continue;
if (!bh || BADNESS(tmp)<BADNESS(bh)) {
bh = tmp;
if (!BADNESS(tmp))
break;
}
/* and repeat until we find something good */
} while ((tmp = tmp->b_next_free) != free_list);
if (!bh) {
sleep_on(&buffer_wait);
goto repeat;
}
wait_on_buffer(bh);
if (bh->b_count)
goto repeat;
while (bh->b_dirt) {
sync_dev(bh->b_dev);
wait_on_buffer(bh);
if (bh->b_count)
goto repeat;
}
/* NOTE!! While we slept waiting for this block, somebody else might */
/* already have added "this" block to the cache. check it */
if (find_buffer(dev,block))
goto repeat;
/* OK, FINALLY we know that this buffer is the only one of it's kind, */
/* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
bh->b_count=1;
bh->b_dirt=0;
bh->b_uptodate=0;
remove_from_queues(bh);
bh->b_dev=dev;
bh->b_blocknr=block;
insert_into_queues(bh);
return bh;
}
void brelse(struct buffer_head * buf)
{
if (!buf)
return;
wait_on_buffer(buf);
if (!(buf->b_count--))
panic("Trying to free free buffer");
wake_up(&buffer_wait);
}
/*
* bread() reads a specified block and returns the buffer that contains
* it. It returns NULL if the block was unreadable.
*/
struct buffer_head * bread(int dev,int block)
{
struct buffer_head * bh;
if (!(bh=getblk(dev,block)))
panic("bread: getblk returned NULL\n");
if (bh->b_uptodate)
return bh;
ll_rw_block(READ,bh);
wait_on_buffer(bh);
if (bh->b_uptodate)
return bh;
brelse(bh);
return NULL;
}
#define COPYBLK(from,to) \
__asm__("cld\n\t" \
"rep\n\t" \
"movsl\n\t" \
::"c" (BLOCK_SIZE/4),"S" (from),"D" (to) \
)
/*
* bread_page reads four buffers into memory at the desired address. It's
* a function of its own, as there is some speed to be got by reading them
* all at the same time, not waiting for one to be read, and then another
* etc.
*/
void bread_page(unsigned long address,int dev,int b[4])
{
struct buffer_head * bh[4];
int i;
for (i=0 ; i<4 ; i++)
if (b[i]) {
if (bh[i] = getblk(dev,b[i]))
if (!bh[i]->b_uptodate)
ll_rw_block(READ,bh[i]);
} else
bh[i] = NULL;
for (i=0 ; i<4 ; i++,address += BLOCK_SIZE)
if (bh[i]) {
wait_on_buffer(bh[i]);
if (bh[i]->b_uptodate)
COPYBLK((unsigned long) bh[i]->b_data,address);
brelse(bh[i]);
}
}
/*
* Ok, breada can be used as bread, but additionally to mark other
* blocks for reading as well. End the argument list with a negative
* number.
*/
struct buffer_head * breada(int dev,int first, ...)
{
va_list args;
struct buffer_head * bh, *tmp;
va_start(args,first);
if (!(bh=getblk(dev,first)))
panic("bread: getblk returned NULL\n");
if (!bh->b_uptodate)
ll_rw_block(READ,bh);
while ((first=va_arg(args,int))>=0) {
tmp=getblk(dev,first);
if (tmp) {
if (!tmp->b_uptodate)
ll_rw_block(READA,bh);
tmp->b_count--;
}
}
va_end(args);
wait_on_buffer(bh);
if (bh->b_uptodate)
return bh;
brelse(bh);
return (NULL);
}
void buffer_init(long buffer_end)
{
struct buffer_head * h = start_buffer;
void * b;
int i;
if (buffer_end == 1<<20)
b = (void *) (640*1024);
else
b = (void *) buffer_end;
while ( (b -= BLOCK_SIZE) >= ((void *) (h+1)) ) {
h->b_dev = 0;
h->b_dirt = 0;
h->b_count = 0;
h->b_lock = 0;
h->b_uptodate = 0;
h->b_wait = NULL;
h->b_next = NULL;
h->b_prev = NULL;
h->b_data = (char *) b;
h->b_prev_free = h-1;
h->b_next_free = h+1;
h++;
NR_BUFFERS++;
if (b == (void *) 0x100000)
b = (void *) 0xA0000;
}
h--;
free_list = start_buffer;
free_list->b_prev_free = h;
h->b_next_free = free_list;
for (i=0;i<NR_HASH;i++)
hash_table[i]=NULL;
}
/*coded by dj */
int sys_getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count)
{
struct file *file;
struct m_inode *f_inode;
struct linux_dirent my_dirent;
struct buffer_head *block;
struct dir_entry *dir; /*目录项地址*/
char *buf;
int byte = 0; /*读取字节数*/
file = current->filp[fd]; /*读取的文件*/
if (!count)
return -1; /*失败返回-1*/
f_inode = file->f_inode; /*文件的索引节点*/
block = bread(f_inode->i_dev, f_inode->i_zone[0]);
int k; /*已读到的位置*/
int len_dir = sizeof(struct dir_entry);
int len_dirent = sizeof(struct linux_dirent);
for (k = 0; k < f_inode->i_size; k += len_dir)
{
if (byte + len_dirent >= count) /*到目录结尾返回0*/
return 0;
dir = (struct dir_entry *)(block->b_data + k); /*获取目录项*/
if (dir->inode) /*将要写入缓冲区的数据存好*/
{
my_dirent.d_ino = dir->inode;
int i;
for (i = 0; i < NAME_LEN; i++)
my_dirent.d_name[i] = dir->name[i];
my_dirent.d_off = 0;
my_dirent.d_reclen = sizeof(my_dirent);
buf = &my_dirent;
for (i = 0; i < my_dirent.d_reclen; i++)
{
put_fs_byte(*(buf + i), ((char *)dirp) + i + byte); /*将用户态数据传入内核*/
}
byte += my_dirent.d_reclen;
}
else
continue; /*空目录跳过*/
}
return byte;
}
/*coded by dj */

723
exec.c

@ -0,0 +1,723 @@
/*
* linux/fs/exec.c
*
* (C) 1991 Linus Torvalds
*/
/*
* #!-checking implemented by tytso.
*/
/*
* Demand-loading implemented 01.12.91 - no need to read anything but
* the header into memory. The inode of the executable is put into
* "current->executable", and page faults do the actual loading. Clean.
*
* Once more I can proudly say that linux stood up to being changed: it
* was less than 2 hours work to get demand-loading completely implemented.
*/
#include <errno.h>
#include <string.h>
#include <sys/stat.h>
#include <a.out.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <asm/segment.h>
extern int sys_exit(int exit_code);
extern int sys_close(int fd);
/*
* MAX_ARG_PAGES defines the number of pages allocated for arguments
* and envelope for the new program. 32 should suffice, this gives
* a maximum env+arg of 128kB !
*/
#define MAX_ARG_PAGES 32
int sys_uselib()
{
return -ENOSYS;
}
/*
* create_tables() parses the env- and arg-strings in new user
* memory and creates the pointer tables from them, and puts their
* addresses on the "stack", returning the new stack pointer value.
*/
static unsigned long * create_tables(char * p,int argc,int envc)
{
unsigned long *argv,*envp;
unsigned long * sp;
sp = (unsigned long *) (0xfffffffc & (unsigned long) p);
sp -= envc+1;
envp = sp;
sp -= argc+1;
argv = sp;
put_fs_long((unsigned long)envp,--sp);
put_fs_long((unsigned long)argv,--sp);
put_fs_long((unsigned long)argc,--sp);
while (argc-->0) {
put_fs_long((unsigned long) p,argv++);
while (get_fs_byte(p++)) /* nothing */ ;
}
put_fs_long(0,argv);
while (envc-->0) {
put_fs_long((unsigned long) p,envp++);
while (get_fs_byte(p++)) /* nothing */ ;
}
put_fs_long(0,envp);
return sp;
}
/*
* count() counts the number of arguments/envelopes
*/
static int count(char ** argv)
{
int i=0;
char ** tmp;
if (tmp = argv)
while (get_fs_long((unsigned long *) (tmp++)))
i++;
return i;
}
/*
* 'copy_string()' copies argument/envelope strings from user
* memory to free pages in kernel mem. These are in a format ready
* to be put directly into the top of new user memory.
*
* Modified by TYT, 11/24/91 to add the from_kmem argument, which specifies
* whether the string and the string array are from user or kernel segments:
*
* from_kmem argv * argv **
* 0 user space user space
* 1 kernel space user space
* 2 kernel space kernel space
*
* We do this by playing games with the fs segment register. Since it
* it is expensive to load a segment register, we try to avoid calling
* set_fs() unless we absolutely have to.
*/
static unsigned long copy_strings(int argc,char ** argv,unsigned long *page,
unsigned long p, int from_kmem)
{
char *tmp, *pag;
int len, offset = 0;
unsigned long old_fs, new_fs;
if (!p)
return 0; /* bullet-proofing */
new_fs = get_ds();
old_fs = get_fs();
if (from_kmem==2)
set_fs(new_fs);
while (argc-- > 0) {
if (from_kmem == 1)
set_fs(new_fs);
if (!(tmp = (char *)get_fs_long(((unsigned long *)argv)+argc)))
panic("argc is wrong");
if (from_kmem == 1)
set_fs(old_fs);
len=0; /* remember zero-padding */
do {
len++;
} while (get_fs_byte(tmp++));
if (p-len < 0) { /* this shouldn't happen - 128kB */
set_fs(old_fs);
return 0;
}
while (len) {
--p; --tmp; --len;
if (--offset < 0) {
offset = p % PAGE_SIZE;
if (from_kmem==2)
set_fs(old_fs);
if (!(pag = (char *) page[p/PAGE_SIZE]) &&
!(pag = (char *) (page[p/PAGE_SIZE] =
(unsigned long *) get_free_page())))
return 0;
if (from_kmem==2)
set_fs(new_fs);
}
*(pag + offset) = get_fs_byte(tmp);
}
}
if (from_kmem==2)
set_fs(old_fs);
return p;
}
static unsigned long change_ldt(unsigned long text_size,unsigned long * page)
{
unsigned long code_limit,data_limit,code_base,data_base;
int i;
code_limit = text_size+PAGE_SIZE -1;
code_limit &= 0xFFFFF000;
data_limit = 0x4000000;
code_base = get_base(current->ldt[1]);
data_base = code_base;
set_base(current->ldt[1],code_base);
set_limit(current->ldt[1],code_limit);
set_base(current->ldt[2],data_base);
set_limit(current->ldt[2],data_limit);
/* make sure fs points to the NEW data segment */
__asm__("pushl $0x17\n\tpop %%fs"::);
data_base += data_limit;
for (i=MAX_ARG_PAGES-1 ; i>=0 ; i--) {
data_base -= PAGE_SIZE;
if (page[i])
put_page(page[i],data_base);
}
return data_limit;
}
/*
* 'do_execve()' executes a new program.
*/
int do_execve(unsigned long * eip,long tmp,char * filename,
char ** argv, char ** envp)
{
struct m_inode * inode;
struct buffer_head * bh;
struct exec ex;
unsigned long page[MAX_ARG_PAGES];
int i,argc,envc;
int e_uid, e_gid;
int retval;
int sh_bang = 0;
unsigned long p=PAGE_SIZE*MAX_ARG_PAGES-4;
if ((0xffff & eip[1]) != 0x000f)
panic("execve called from supervisor mode");
for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */
page[i]=0;
if (!(inode=namei(filename))) /* get executables inode */
return -ENOENT;
argc = count(argv);
envc = count(envp);
restart_interp:
if (!S_ISREG(inode->i_mode)) { /* must be regular file */
retval = -EACCES;
goto exec_error2;
}
i = inode->i_mode;
e_uid = (i & S_ISUID) ? inode->i_uid : current->euid;
e_gid = (i & S_ISGID) ? inode->i_gid : current->egid;
if (current->euid == inode->i_uid)
i >>= 6;
else if (current->egid == inode->i_gid)
i >>= 3;
if (!(i & 1) &&
!((inode->i_mode & 0111) && suser())) {
retval = -ENOEXEC;
goto exec_error2;
}
if (!(bh = bread(inode->i_dev,inode->i_zone[0]))) {
retval = -EACCES;
goto exec_error2;
}
ex = *((struct exec *) bh->b_data); /* read exec-header */
if ((bh->b_data[0] == '#') && (bh->b_data[1] == '!') && (!sh_bang)) {
/*
* This section does the #! interpretation.
* Sorta complicated, but hopefully it will work. -TYT
*/
char buf[1023], *cp, *interp, *i_name, *i_arg;
unsigned long old_fs;
strncpy(buf, bh->b_data+2, 1022);
brelse(bh);
iput(inode);
buf[1022] = '\0';
if (cp = strchr(buf, '\n')) {
*cp = '\0';
for (cp = buf; (*cp == ' ') || (*cp == '\t'); cp++);
}
if (!cp || *cp == '\0') {
retval = -ENOEXEC; /* No interpreter name found */
goto exec_error1;
}
interp = i_name = cp;
i_arg = 0;
for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++) {
if (*cp == '/')
i_name = cp+1;
}
if (*cp) {
*cp++ = '\0';
i_arg = cp;
}
/*
* OK, we've parsed out the interpreter name and
* (optional) argument.
*/
if (sh_bang++ == 0) {
p = copy_strings(envc, envp, page, p, 0);
p = copy_strings(--argc, argv+1, page, p, 0);
}
/*
* Splice in (1) the interpreter's name for argv[0]
* (2) (optional) argument to interpreter
* (3) filename of shell script
*
* This is done in reverse order, because of how the
* user environment and arguments are stored.
*/
p = copy_strings(1, &filename, page, p, 1);
argc++;
if (i_arg) {
p = copy_strings(1, &i_arg, page, p, 2);
argc++;
}
p = copy_strings(1, &i_name, page, p, 2);
argc++;
if (!p) {
retval = -ENOMEM;
goto exec_error1;
}
/*
* OK, now restart the process with the interpreter's inode.
*/
old_fs = get_fs();
set_fs(get_ds());
if (!(inode=namei(interp))) { /* get executables inode */
set_fs(old_fs);
retval = -ENOENT;
goto exec_error1;
}
set_fs(old_fs);
goto restart_interp;
}
brelse(bh);
if (N_MAGIC(ex) != ZMAGIC || ex.a_trsize || ex.a_drsize ||
ex.a_text+ex.a_data+ex.a_bss>0x3000000 ||
inode->i_size < ex.a_text+ex.a_data+ex.a_syms+N_TXTOFF(ex)) {
retval = -ENOEXEC;
goto exec_error2;
}
if (N_TXTOFF(ex) != BLOCK_SIZE) {
printk("%s: N_TXTOFF != BLOCK_SIZE. See a.out.h.", filename);
retval = -ENOEXEC;
goto exec_error2;
}
if (!sh_bang) {
p = copy_strings(envc,envp,page,p,0);
p = copy_strings(argc,argv,page,p,0);
if (!p) {
retval = -ENOMEM;
goto exec_error2;
}
}
/* OK, This is the point of no return */
if (current->executable)
iput(current->executable);
current->executable = inode;
for (i=0 ; i<32 ; i++)
current->sigaction[i].sa_handler = NULL;
for (i=0 ; i<NR_OPEN ; i++)
if ((current->close_on_exec>>i)&1)
sys_close(i);
current->close_on_exec = 0;
free_page_tables(get_base(current->ldt[1]),get_limit(0x0f));
free_page_tables(get_base(current->ldt[2]),get_limit(0x17));
if (last_task_used_math == current)
last_task_used_math = NULL;
current->used_math = 0;
p += change_ldt(ex.a_text,page)-MAX_ARG_PAGES*PAGE_SIZE;
p = (unsigned long) create_tables((char *)p,argc,envc);
current->brk = ex.a_bss +
(current->end_data = ex.a_data +
(current->end_code = ex.a_text));
current->start_stack = p & 0xfffff000;
current->euid = e_uid;
current->egid = e_gid;
i = ex.a_text+ex.a_data;
while (i&0xfff)
put_fs_byte(0,(char *) (i++));
eip[0] = ex.a_entry; /* eip, magic happens :-) */
eip[3] = p; /* stack pointer */
return 0;
exec_error2:
iput(inode);
exec_error1:
for (i=0 ; i<MAX_ARG_PAGES ; i++)
free_page(page[i]);
return(retval);
}
/*
int execve2(const char *path, char * argv[], char * envp[]);
path:
argv:
envp:
-1
* coded by dj
*/
int do_execve2(unsigned long * eip,long tmp,char * filename,
char ** argv, char ** envp)
{
struct m_inode * inode;
struct buffer_head * bh;
struct exec ex;
unsigned long page[MAX_ARG_PAGES]; // 参数和环境串空间页面指针数组。
int i,argc,envc;
int e_uid, e_gid; // 有效用户 ID 和有效组 ID。
int retval;
int sh_bang = 0; // 控制是否需要执行脚本程序。
unsigned long p=PAGE_SIZE*MAX_ARG_PAGES-4; // p 指向参数和环境空间的最后部
unsigned long end_ad,address;
/*
128KB32
p 128KB 1
p 128KB
eip[1] CS
0x000f CS
0x0008
eip[1] 128KB
i argc
envc
*/
if ((0xffff & eip[1]) != 0x000f)
panic("execve called from supervisor mode");
for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */
page[i]=0;
if (!(inode=namei(filename))) /* get executables inode */
return -ENOENT;
argc = count(argv);
envc = count(envp); //参数个数
restart_interp:
if (!S_ISREG(inode->i_mode)) { /* must be regular file */
retval = -EACCES;
goto exec_error2; //若不是常规文件则置出错码,跳转
}
/*
i
i i
--IDset-user_id--ID
set-group-id
rootpasswd
set-user-id IDeuid
ID euid
set-group-id IDegid
ID egid
e_uid e_gid
*/
i = inode->i_mode;
e_uid = (i & S_ISUID) ? inode->i_uid : current->euid;
e_gid = (i & S_ISGID) ? inode->i_gid : current->egid;
/*
euid egid 访
i 6 3 宿访
使 3
访 3 访
3
0
exec_error2 退
*/
if (current->euid == inode->i_uid)
i >>= 6;
else if (current->egid == inode->i_gid)
i >>= 3;
if (!(i & 1) &&
!((inode->i_mode & 0111) && suser())) {
retval = -ENOEXEC;
goto exec_error2;
}
/*
shell
ex
'#!'
shell
#/bin/bash
'#!'
128KB
i
sh_bang
*/
if (!(bh = bread(inode->i_dev,inode->i_zone[0]))) {
retval = -EACCES;
goto exec_error2;
}
ex = *((struct exec *) bh->b_data); /* read exec-header */
if ((bh->b_data[0] == '#') && (bh->b_data[1] == '!') && (!sh_bang)) {
/*
* This section does the #! interpretation.
* Sorta complicated, but hopefully it will work. -TYT
*/
char buf[1023], *cp, *interp, *i_name, *i_arg;
unsigned long old_fs;
/*
1 '#!'
buf/bin/sh
buf
*/
strncpy(buf, bh->b_data+2, 1022);
brelse(bh);
iput(inode);
buf[1022] = '\0';
if (cp = strchr(buf, '\n')) {
*cp = '\0';
for (cp = buf; (*cp == ' ') || (*cp == '\t'); cp++);
}
if (!cp || *cp == '\0') {
retval = -ENOEXEC; /* No interpreter name found */
goto exec_error1;
}
/*
i_name
i_arg
*/
interp = i_name = cp;
i_arg = 0;
for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++) {
if (*cp == '/')
i_name = cp+1;
}
if (*cp) {
*cp++ = '\0';
i_arg = cp;
}
/*
* OK, we've parsed out the interpreter name and
* (optional) argument.
*/
/*
i_name i_arg
sh_bang
envc argc-1
*/
if (sh_bang++ == 0) {
p = copy_strings(envc, envp, page, p, 0);
p = copy_strings(--argc, argv+1, page, p, 0);
}
/*
* Splice in (1) the interpreter's name for argv[0]
* (2) (optional) argument to interpreter
* (3) filename of shell script
*
* This is done in reverse order, because of how the
* user environment and arguments are stored.
*/
/*
filename copy_strings()
1 copy_strings() 2
*/
p = copy_strings(1, &filename, page, p, 1);
argc++;
if (i_arg) {
p = copy_strings(1, &i_arg, page, p, 2);
argc++;
}
p = copy_strings(1, &i_name, page, p, 2);
argc++;
if (!p) {
retval = -ENOMEM;
goto exec_error1;
}
/*
* OK, now restart the process with the interpreter's inode.
*/
// OK现在使用解释程序的 i 节点重启进程。
/*
i
i 使 namei()使
FS namei()
FS namei() FS
restart_interp --
*/
old_fs = get_fs();
set_fs(get_ds());
if (!(inode=namei(interp))) { /* get executables inode */
set_fs(old_fs);
retval = -ENOENT;
goto exec_error1;
}
set_fs(old_fs);
goto restart_interp;
}
/*
ex
ex ZMAGIC
0
ZMAGIC
0( + + ) 50MB
( + + + )
*/
brelse(bh);
if (N_MAGIC(ex) != ZMAGIC || ex.a_trsize || ex.a_drsize ||
ex.a_text+ex.a_data+ex.a_bss>0x3000000 ||
inode->i_size < ex.a_text+ex.a_data+ex.a_syms+N_TXTOFF(ex)) {
retval = -ENOEXEC;
goto exec_error2;
}
/*
1 1024
Demand paging
*/
if (N_TXTOFF(ex) != BLOCK_SIZE) {
printk("%s: N_TXTOFF != BLOCK_SIZE. See a.out.h.", filename);
retval = -ENOEXEC;
goto exec_error2;
}
/*
sh_bang
sh_bang
sh_bang p
p 1 p 128KB
p=0
*/
if (!sh_bang) {
p = copy_strings(envc,envp,page,p,0);
p = copy_strings(argc,argv,page,p,0);
if (!p) {
retval = -ENOMEM;
goto exec_error2;
}
}
/* OK, This is the point of no return */
/*
使
使
LDT
e_uid e_gid
eip[]退
copy_strings()使 get_free_page()
change_ldt()使 put_page() reate_tables()
*/
// 这里我们首先放回进程原执行程序的 i 节点,并且让进程 executable 字段指向新执行文件的 i
// 节点。然后复位原进程的所有信号处理句柄,但对于 SIG_IGN 句柄无须复位。再根据设定的执行
// 时关闭文件句柄close_on_exec位图标志关闭指定的打开文件并复位该标志。
if (current->executable)
iput(current->executable);
current->executable = inode;
for (i=0 ; i<32 ; i++)
current->sigaction[i].sa_handler = NULL;
for (i=0 ; i<NR_OPEN ; i++)
if ((current->close_on_exec>>i)&1)
sys_close(i);
current->close_on_exec = 0;
// 然后根据当前进程指定的基地址和限长,释放原来程序的代码段和数据段所对应的内存页表指定
// 的物理内存页面及页表本身。此时新执行文件并没有占用主内存区任何页面,因此在处理器真正
// 运行新执行文件代码时就会引起缺页异常中断。此时内存管理程序即会执行缺页处理而为新执行
// 文件申请内存页面和设置相关页表项,并且把相关执行文件页面读入内存中。如果“上次任务使
// 用了协处理器”指向的是当前进程,则将其置空,并复位使用了协处理器的标志。
free_page_tables(get_base(current->ldt[1]),get_limit(0x0f));
free_page_tables(get_base(current->ldt[2]),get_limit(0x17));
if (last_task_used_math == current)
last_task_used_math = NULL;
current->used_math = 0;
// 然后我们根据新执行文件头结构中的代码长度字段 a_text 的值,来修改局部表中描述符基址和
// 段限长,并将 128KB 的参数和环境空间页面放置在数据段末端。执行下面语句之后p 此时更改
// 成以数据段起始处为原点的偏移值,但仍指向参数和环境空间数据开始处,即已转换成为栈指针
// 值。 然后调用内部函数 create_tables() 在栈空间中创建环境和参数变量指针表,供程序的
// main()作为参数使用,并返回该栈指针。
p += change_ldt(ex.a_text,page)-MAX_ARG_PAGES*PAGE_SIZE;
p = (unsigned long) create_tables((char *)p,argc,envc);
// 接着再修改进程各字段值为新执行文件的信息。即令进程任务结构代码尾字段 end_code 等于执
// 行文件的代码段长度 a_text数据尾字段 end_data 等于执行文件的代码段长度加数据段长度
// a_data + a_text并令进程堆结尾字段 brk = a_text + a_data + a_bss。 brk 用于指明
// 进程当前数据段(包括未初始化数据部分)末端位置,供内核为进程分配内存时指定分配开始位
// 置。然后设置进程栈开始字段为栈指针所在页面,并重新设置进程的有效用户 id 和有效组 id。
current->brk = ex.a_bss +
(current->end_data = ex.a_data +
(current->end_code = ex.a_text));
current->start_stack = p & 0xfffff000;
current->euid = e_uid;
current->egid = e_gid;
i = ex.a_text+ex.a_data;
while (i&0xfff)
put_fs_byte(0,(char *) (i++));
//直接进行缺页处理
end_ad=current->start_code+current->end_data;
for(address=current->start_code;address<=end_ad;address+=4096)
my_do_no_page(address);
// 最后将原调用系统中断的程序在堆栈上的代码指针替换为指向新执行程序的入口点,并将栈指针
// 替换为新执行文件的栈指针。此后返回指令将弹出这些栈数据并使得 CPU 去执行新执行文件,因
// 此不会返回到原调用系统中断的程序中去了。
eip[0] = ex.a_entry; /* eip, magic happens :-) */
eip[3] = p; /* stack pointer */
return 0;
exec_error2:
iput(inode);
exec_error1:
for (i=0 ; i<MAX_ARG_PAGES ; i++)
free_page(page[i]);
return(retval);
}

@ -0,0 +1,492 @@
/*
* linux/mm/memory.c
*
* (C) 1991 Linus Torvalds
*/
/*
* demand-loading started 01.12.91 - seems it is high on the list of
* things wanted, and it should be easy to implement. - Linus
*/
/*
* Ok, demand-loading was easy, shared pages a little bit tricker. Shared
* pages started 02.12.91, seems to work. - Linus.
*
* Tested sharing by executing about 30 /bin/sh: under the old kernel it
* would have taken more than the 6M I have free, but it worked well as
* far as I could see.
*
* Also corrected some "invalidate()"s - I wasn't doing enough of them.
*/
#include <signal.h>
#include <asm/system.h>
#include <linux/sched.h>
#include <linux/head.h>
#include <linux/kernel.h>
volatile void do_exit(long code);
static inline volatile void oom(void)
{
printk("out of memory\n\r");
do_exit(SIGSEGV);
}
#define invalidate() \
__asm__("movl %%eax,%%cr3"::"a" (0))
/* these are not to be changed without changing head.s etc */
#define LOW_MEM 0x100000
#define PAGING_MEMORY (15*1024*1024)
#define PAGING_PAGES (PAGING_MEMORY>>12)
#define MAP_NR(addr) (((addr)-LOW_MEM)>>12)
#define USED 100
#define CODE_SPACE(addr) ((((addr)+4095)&~4095) < \
current->start_code + current->end_code)
static long HIGH_MEMORY = 0;
#define copy_page(from,to) \
__asm__("cld ; rep ; movsl"::"S" (from),"D" (to),"c" (1024))
static unsigned char mem_map [ PAGING_PAGES ] = {0,};
/*
* Get physical address of first (actually last :-) free page, and mark it
* used. If no free pages left, return 0.
*/
unsigned long get_free_page(void)
{
register unsigned long __res asm("ax");
__asm__("std ; repne ; scasb\n\t"
"jne 1f\n\t"
"movb $1,1(%%edi)\n\t"
"sall $12,%%ecx\n\t"
"addl %2,%%ecx\n\t"
"movl %%ecx,%%edx\n\t"
"movl $1024,%%ecx\n\t"
"leal 4092(%%edx),%%edi\n\t"
"rep ; stosl\n\t"
"movl %%edx,%%eax\n\t"
"1:"
"cld\n\t" /* by wyj */
:"=a" (__res)
:"0" (0),"i" (LOW_MEM),"c" (PAGING_PAGES),
"D" (mem_map+PAGING_PAGES-1)
);
return __res;
}
/*
* Free a page of memory at physical address 'addr'. Used by
* 'free_page_tables()'
*/
void free_page(unsigned long addr)
{
if (addr < LOW_MEM) return;
if (addr >= HIGH_MEMORY)
panic("trying to free nonexistent page");
addr -= LOW_MEM;
addr >>= 12;
if (mem_map[addr]--) return;
mem_map[addr]=0;
panic("trying to free free page");
}
/*
* This function frees a continuos block of page tables, as needed
* by 'exit()'. As does copy_page_tables(), this handles only 4Mb blocks.
*/
int free_page_tables(unsigned long from,unsigned long size)
{
unsigned long *pg_table;
unsigned long * dir, nr;
if (from & 0x3fffff)
panic("free_page_tables called with wrong alignment");
if (!from)
panic("Trying to free up swapper memory space");
size = (size + 0x3fffff) >> 22;
dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */
for ( ; size-->0 ; dir++) {
if (!(1 & *dir))
continue;
pg_table = (unsigned long *) (0xfffff000 & *dir);
for (nr=0 ; nr<1024 ; nr++) {
if (1 & *pg_table)
free_page(0xfffff000 & *pg_table);
*pg_table = 0;
pg_table++;
}
free_page(0xfffff000 & *dir);
*dir = 0;
}
invalidate();
return 0;
}
/*
* Well, here is one of the most complicated functions in mm. It
* copies a range of linerar addresses by copying only the pages.
* Let's hope this is bug-free, 'cause this one I don't want to debug :-)
*
* Note! We don't copy just any chunks of memory - addresses have to
* be divisible by 4Mb (one page-directory entry), as this makes the
* function easier. It's used only by fork anyway.
*
* NOTE 2!! When from==0 we are copying kernel space for the first
* fork(). Then we DONT want to copy a full page-directory entry, as
* that would lead to some serious memory waste - we just copy the
* first 160 pages - 640kB. Even that is more than we need, but it
* doesn't take any more memory - we don't copy-on-write in the low
* 1 Mb-range, so the pages can be shared with the kernel. Thus the
* special case for nr=xxxx.
*/
int copy_page_tables(unsigned long from,unsigned long to,long size)
{
unsigned long * from_page_table;
unsigned long * to_page_table;
unsigned long this_page;
unsigned long * from_dir, * to_dir;
unsigned long nr;
if ((from&0x3fffff) || (to&0x3fffff))
panic("copy_page_tables called with wrong alignment");
from_dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */
to_dir = (unsigned long *) ((to>>20) & 0xffc);
size = ((unsigned) (size+0x3fffff)) >> 22;
for( ; size-->0 ; from_dir++,to_dir++) {
if (1 & *to_dir)
panic("copy_page_tables: already exist");
if (!(1 & *from_dir))
continue;
from_page_table = (unsigned long *) (0xfffff000 & *from_dir);
if (!(to_page_table = (unsigned long *) get_free_page()))
return -1; /* Out of memory, see freeing */
*to_dir = ((unsigned long) to_page_table) | 7;
nr = (from==0)?0xA0:1024;
for ( ; nr-- > 0 ; from_page_table++,to_page_table++) {
this_page = *from_page_table;
if (!(1 & this_page))
continue;
this_page &= ~2;
*to_page_table = this_page;
if (this_page > LOW_MEM) {
*from_page_table = this_page;
this_page -= LOW_MEM;
this_page >>= 12;
mem_map[this_page]++;
}
}
}
invalidate();
return 0;
}
/*
* This function puts a page in memory at the wanted address.
* It returns the physical address of the page gotten, 0 if
* out of memory (either when trying to access page-table or
* page.)
*/
unsigned long put_page(unsigned long page,unsigned long address)
{
unsigned long tmp, *page_table;
/* NOTE !!! This uses the fact that _pg_dir=0 */
if (page < LOW_MEM || page >= HIGH_MEMORY)
printk("Trying to put page %p at %p\n",page,address);
if (mem_map[(page-LOW_MEM)>>12] != 1)
printk("mem_map disagrees with %p at %p\n",page,address);
page_table = (unsigned long *) ((address>>20) & 0xffc);
if ((*page_table)&1)
page_table = (unsigned long *) (0xfffff000 & *page_table);
else {
if (!(tmp=get_free_page()))
return 0;
*page_table = tmp|7;
page_table = (unsigned long *) tmp;
}
page_table[(address>>12) & 0x3ff] = page | 7;
/* no need for invalidate */
return page;
}
void un_wp_page(unsigned long * table_entry)
{
unsigned long old_page,new_page;
old_page = 0xfffff000 & *table_entry;
if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) {
*table_entry |= 2;
invalidate();
return;
}
if (!(new_page=get_free_page()))
oom();
if (old_page >= LOW_MEM)
mem_map[MAP_NR(old_page)]--;
*table_entry = new_page | 7;
invalidate();
copy_page(old_page,new_page);
}
/*
* This routine handles present pages, when users try to write
* to a shared page. It is done by copying the page to a new address
* and decrementing the shared-page counter for the old page.
*
* If it's in code space we exit with a segment error.
*/
void do_wp_page(unsigned long error_code,unsigned long address)
{
#if 0
/* we cannot do this yet: the estdio library writes to code space */
/* stupid, stupid. I really want the libc.a from GNU */
if (CODE_SPACE(address))
do_exit(SIGSEGV);
#endif
un_wp_page((unsigned long *)
(((address>>10) & 0xffc) + (0xfffff000 &
*((unsigned long *) ((address>>20) &0xffc)))));
}
void write_verify(unsigned long address)
{
unsigned long page;
if (!( (page = *((unsigned long *) ((address>>20) & 0xffc)) )&1))
return;
page &= 0xfffff000;
page += ((address>>10) & 0xffc);
if ((3 & *(unsigned long *) page) == 1) /* non-writeable, present */
un_wp_page((unsigned long *) page);
return;
}
void get_empty_page(unsigned long address)
{
unsigned long tmp;
if (!(tmp=get_free_page()) || !put_page(tmp,address)) {
free_page(tmp); /* 0 is ok - ignored */
oom();
}
}
/*
* try_to_share() checks the page at address "address" in the task "p",
* to see if it exists, and if it is clean. If so, share it with the current
* task.
*
* NOTE! This assumes we have checked that p != current, and that they
* share the same executable.
*/
static int try_to_share(unsigned long address, struct task_struct * p)
{
unsigned long from;
unsigned long to;
unsigned long from_page;
unsigned long to_page;
unsigned long phys_addr;
from_page = to_page = ((address>>20) & 0xffc);
from_page += ((p->start_code>>20) & 0xffc);
to_page += ((current->start_code>>20) & 0xffc);
/* is there a page-directory at from? */
from = *(unsigned long *) from_page;
if (!(from & 1))
return 0;
from &= 0xfffff000;
from_page = from + ((address>>10) & 0xffc);
phys_addr = *(unsigned long *) from_page;
/* is the page clean and present? */
if ((phys_addr & 0x41) != 0x01)
return 0;
phys_addr &= 0xfffff000;
if (phys_addr >= HIGH_MEMORY || phys_addr < LOW_MEM)
return 0;
to = *(unsigned long *) to_page;
if (!(to & 1))
if (to = get_free_page())
*(unsigned long *) to_page = to | 7;
else
oom();
to &= 0xfffff000;
to_page = to + ((address>>10) & 0xffc);
if (1 & *(unsigned long *) to_page)
panic("try_to_share: to_page already exists");
/* share them: write-protect */
*(unsigned long *) from_page &= ~2;
*(unsigned long *) to_page = *(unsigned long *) from_page;
invalidate();
phys_addr -= LOW_MEM;
phys_addr >>= 12;
mem_map[phys_addr]++;
return 1;
}
/*
* share_page() tries to find a process that could share a page with
* the current one. Address is the address of the wanted page relative
* to the current data space.
*
* We first check if it is at all feasible by checking executable->i_count.
* It should be >1 if there are other tasks sharing this inode.
*/
static int share_page(unsigned long address)
{
struct task_struct ** p;
if (!current->executable)
return 0;
if (current->executable->i_count < 2)
return 0;
for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
if (!*p)
continue;
if (current == *p)
continue;
if ((*p)->executable != current->executable)
continue;
if (try_to_share(address,*p))
return 1;
}
return 0;
}
/*
do_no_page()线
线
线+1
线
*/
void do_no_page(unsigned long error_code,unsigned long address)
{
int nr[4];
unsigned long tmp;
unsigned long page;
int block,i;
address &= 0xfffff000; // address 处缺页页面地址。
tmp = address - current->start_code; // 缺页页面对应逻辑地址。
if (!current->executable || tmp >= current->end_data) {
get_empty_page(address);
return;
}
if (share_page(tmp)) // 尝试逻辑地址 tmp 处页面的共享。
return;
if (!(page = get_free_page())) // 申请一页物理内存
oom();
/* remember that 1 block is used for header */
block = 1 + tmp/BLOCK_SIZE;
for (i=0 ; i<4 ; block++,i++)
nr[i] = bmap(current->executable,block);
bread_page(page,current->executable->i_dev,nr);
i = tmp + 4096 - current->end_data;
tmp = page + 4096;
while (i-- > 0) {
tmp--;
*(char *)tmp = 0;
}
if (put_page(page,address))
return;
free_page(page);
oom();
}
/* for exec.c/do_execve2 */
/* coded by dj */
void my_do_no_page(unsigned long address)
{
int nr[4];
unsigned long tmp;
unsigned long page;
int block,i;
address &= 0xfffff000; // address 处缺页页面地址。
tmp = address - current->start_code; // 缺页页面对应逻辑地址。
/* 
executable +
线executable i 0
线
get_empty_page()
线
*/
if (!current->executable || tmp >= current->end_data) {
get_empty_page(address);
return;
}
if (share_page(tmp)) // 尝试逻辑地址 tmp 处页面的共享。
return;
if (!(page = get_free_page())) // 申请一页物理内存
oom();
/* remember that 1 block is used for header */
/* 记住,(程序)头要使用 1 个数据块 */
// 根据这个块号和执行文件的 i 节点,我们就可以从映射位图中找到对应块设备中对应的设备
// 逻辑块号(保存在 nr[]数组中)。利用 bread_page()即可把这 4 个逻辑块读入到物理页面
// page 中。
block = 1 + tmp/BLOCK_SIZE;
for (i=0 ; i<4 ; block++,i++)
nr[i] = bmap(current->executable,block);
bread_page(page,current->executable->i_dev,nr);
i = tmp + 4096 - current->end_data; // 超出的字节长度值。
tmp = page + 4096; // tmp 指向页面末端。
while (i-- > 0) { // 页面末端 i 字节清零。
tmp--;
*(char *)tmp = 0;
}
// 最后把引起缺页异常的一页物理页面映射到指定线性地址 address 处。若操作成功就返回,
// 否则就释放内存页,显示内存不够。
if (put_page(page,address))
return;
free_page(page);
oom();
}
void mem_init(long start_mem, long end_mem)
{
int i;
HIGH_MEMORY = end_mem;
for (i=0 ; i<PAGING_PAGES ; i++)
mem_map[i] = USED;
i = MAP_NR(start_mem);
end_mem -= start_mem;
end_mem >>= 12;
while (end_mem-->0)
mem_map[i++]=0;
}
void calc_mem(void)
{
int i,j,k,free=0;
long * pg_tbl;
for(i=0 ; i<PAGING_PAGES ; i++)
if (!mem_map[i]) free++;
printk("%d pages free (of %d)\n\r",free,PAGING_PAGES);
for(i=2 ; i<1024 ; i++) {
if (1&pg_dir[i]) {
pg_tbl=(long *) (0xfffff000 & pg_dir[i]);
for(j=k=0 ; j<1024 ; j++)
if (pg_tbl[j]&1)
k++;
printk("Pg-dir[%d] uses %d pages\n",i,k);
}
}
}
Loading…
Cancel
Save