[MIT6.828] LAB4 PART A

Exercise 1.Implement round-robin scheduling in sched_yield() as described above. Don’t forget to modify syscall() to dispatch sys_yield().

修改kern/sched.c文件的sched_yield()函数

// Choose a user environment to run and run it.
void
sched_yield(void)
{
	int i;
	//find next env;
	struct Env *e = (curenv==NULL || curenv>=envs+NENV-1) ? envs+1: curenv+1;
	//find next runnable env;
	for(i=1; i<NENV; i++)
	{
		if (e->env_status == ENV_RUNNABLE)
		{//got it
			env_run(e);
		}
		// next env
		e = (e == envs+NENV-1) ? envs+1:e+1;
	}
	// Run the special idle environment when nothing else is runnable.
	if (envs[0].env_status == ENV_RUNNABLE)
		env_run(&envs[0]);
	else {
		cprintf("Destroyed all environments - nothing more to do!/n");
		while (1)
			monitor(NULL);
	}
}

注意:
1、注意起始env的选择一定是当前env的下一个,如果没有env或者env到了最后就从第1个env开始,第0个env作为idle使用的。
2、循环次数,由于不遍历第0个env,所以总共最多循环NENV-1次,这时候整好循环到当前env(如果有的话)

Exercise 2. Implement the system calls described above (sys_exofork, sys_env_set_status, sys_page_alloc, sys_page_map, sys_page_unmap) in kern/syscall.c. You will need to use various functions in kern/pmap.c and kern/env.c, particularly envid2env(). For now, whenever you call envid2env(), pass 1 in the checkperm parameter. Be sure you check for any invalid system call arguments, returning -E_INVAL in that case. Test your JOS kernel with user/dumbfork and make sure it works before proceeding.

修改kern/syscall.c中如下函数

// Allocate a new environment.
// Returns envid of new environment, or < 0 on error.  Errors are:
//	-E_NO_FREE_ENV if no free environment is available.
//	-E_NO_MEM on memory exhaustion.
static envid_t
sys_exofork(void)
{
	// Create the new environment with env_alloc(), from kern/env.c.
	// It should be left as env_alloc created it, except that
	// status is set to ENV_NOT_RUNNABLE, and the register set is copied
	// from the current environment -- but tweaked so sys_exofork
	// will appear to return 0.
	envid_t ret;
	struct Env *e;
	ret =  env_alloc(&e, curenv->env_id);
	if (ret < 0)
		return ret;
	e->env_status == ENV_NOT_RUNNABLE;
	e->env_tf = curenv->env_tf;
	e->env_tf.tf_regs.reg_eax = 0;
	return e->env_id;
}

注意返回值的问题,子进程要返回0,所以要把0放到eax中,子进程开始运行的时候从内核返回到用户层就是把eax作为返回值。

 

 

// Set envid's env_status to status, which must be ENV_RUNNABLE
// or ENV_NOT_RUNNABLE.
//
// Returns 0 on success, < 0 on error.  Errors are:
//	-E_BAD_ENV if environment envid doesn't currently exist,
//		or the caller doesn't have permission to change envid.
//	-E_INVAL if status is not a valid status for an environment.
static int
sys_env_set_status(envid_t envid, int status)
{
	// Hint: Use the 'envid2env' function from kern/env.c to translate an
	// envid to a struct Env.
	// You should set envid2env's third argument to 1, which will
	// check whether the current environment has permission to set
	// envid's status.

	struct Env *e;
	if (status != ENV_RUNNABLE && status != ENV_NOT_RUNNABLE)
		return -E_INVAL;
	if (envid2env(envid, &e, 1) <0)
		return -E_BAD_ENV;
	e->env_status = status;
	return 0;
}

注意status的值要在限定的范围内。

 

// Allocate a page of memory and map it at 'va' with permission
// 'perm' in the address space of 'envid'.
// The page's contents are set to 0.
// If a page is already mapped at 'va', that page is unmapped as a
// side effect.
//
// perm -- PTE_U | PTE_P must be set, PTE_AVAIL | PTE_W may or may not be set,
//         but no other bits may be set.  See PTE_USER in inc/mmu.h.
//
// Return 0 on success, < 0 on error.  Errors are:
//	-E_BAD_ENV if environment envid doesn't currently exist,
//		or the caller doesn't have permission to change envid.
//	-E_INVAL if va >= UTOP, or va is not page-aligned.
//	-E_INVAL if perm is inappropriate (see above).
//	-E_NO_MEM if there's no memory to allocate the new page,
//		or to allocate any necessary page tables.
static int
sys_page_alloc(envid_t envid, void *va, int perm)
{
	// Hint: This function is a wrapper around page_alloc() and
	//   page_insert() from kern/pmap.c.
	//   Most of the new code you write should be to check the
	//   parameters for correctness.
	//   If page_insert() fails, remember to free the page you
	//   allocated!
	struct Env *e;
	struct Page *page;
	//check virtual address & perm
	if ( (uint32_t)va >= UTOP || PGOFF(va) != 0 || (perm&5) != 5 || (perm & (~PTE_USER)) != 0 )
		return -E_INVAL;
	// get environment id
	if (envid2env(envid, &e, 1) <0)
		return -E_BAD_ENV;
	//alloc physical page
	if (page_alloc(&page) <0)
		return -E_NO_MEM;
	//insert to page tables
	if (page_insert(e->env_pgdir, page, va, perm) <0)
	{
		page_free(page);
		return -E_NO_MEM;
	}
	//fill with 0
	memset(page2kva(page), 0, PGSIZE);
	return 0;
}

注意perm的权限,有些一定要置位,有些一定不能有。

// Map the page of memory at 'srcva' in srcenvid's address space
// at 'dstva' in dstenvid's address space with permission 'perm'.
// Perm has the same restrictions as in sys_page_alloc, except
// that it also must not grant write access to a read-only
// page.
//
// Return 0 on success, < 0 on error.  Errors are:
//	-E_BAD_ENV if srcenvid and/or dstenvid doesn't currently exist,
//		or the caller doesn't have permission to change one of them.
//	-E_INVAL if srcva >= UTOP or srcva is not page-aligned,
//		or dstva >= UTOP or dstva is not page-aligned.
//	-E_INVAL is srcva is not mapped in srcenvid's address space.
//	-E_INVAL if perm is inappropriate (see sys_page_alloc).
//	-E_INVAL if (perm & PTE_W), but srcva is read-only in srcenvid's
//		address space.
//	-E_NO_MEM if there's no memory to allocate any necessary page tables.
static int
sys_page_map(envid_t srcenvid, void *srcva,
	     envid_t dstenvid, void *dstva, int perm)
{
	// Hint: This function is a wrapper around page_lookup() and
	//   page_insert() from kern/pmap.c.
	//   Again, most of the new code you write should be to check the
	//   parameters for correctness.
	//   Use the third argument to page_lookup() to
	//   check the current permissions on the page.
	struct Env *srce, *dste;
	pte_t *ppte;
	struct Page *page;
	//check env_id
	if ((envid2env(srcenvid, &srce, 1)<0) || (envid2env(dstenvid, &dste, 1)<0))
		return -E_BAD_ENV;
	//check vitrual address range and alignment
	if (((uint32_t)srcva >= UTOP) || (PGOFF(srcva) != 0) ||
		((uint32_t)dstva >= UTOP) || (PGOFF(dstva) != 0))
		return -E_INVAL;
	//check srcva is mapped
	if ((page = page_lookup(srce->env_pgdir, srcva, &ppte)) == NULL)
		return -E_INVAL;
	//check perm
	if ( ((perm&5) != 5) || ((perm & (~PTE_USER)) != 0) ||
		((perm & PTE_W)&& !ISPTE_W(*ppte)) )
		return -E_INVAL;
	//insert page to destination
	if (page_insert(dste->env_pgdir, page, dstva, perm) <0)
		return -E_NO_MEM;
	return 0;
}

主要还是传入参数的安全性/合法性检测

 

// Unmap the page of memory at 'va' in the address space of 'envid'.
// If no page is mapped, the function silently succeeds.
//
// Return 0 on success, < 0 on error.  Errors are:
//	-E_BAD_ENV if environment envid doesn't currently exist,
//		or the caller doesn't have permission to change envid.
//	-E_INVAL if va >= UTOP, or va is not page-aligned.
static int
sys_page_unmap(envid_t envid, void *va)
{
	// Hint: This function is a wrapper around page_remove().
	struct Env *e;
	if (envid2env(envid, &e, 1) <0)
		return -E_BAD_ENV;
	if ((uint32_t)va >= UTOP || PGOFF(va) != 0)
		return -E_INVAL;
	page_remove(e->env_pgdir, va);
	return 0;
}

一个page_remove就把活儿轻松搞定

最后还需要注意的是,在syscal()函数里,也要把对应的系统调用安排好。

Challenge 1 Add a less trivial scheduling policy to the kernel, such as a fixed-priority scheduler that allows each environment to be assigned a priority and ensures that higher-priority environments are always chosen in preference to lower-priority environments. If you’re feeling really adventurous, try implementing a Unix-style adjustable-priority scheduler or even a lottery or stride scheduler. (Look up "lottery scheduling" and "stride scheduling" in Google.)

Write a test program or two that verifies that your scheduling algorithm is working correctly (i.e., the right environments get run in the right order). It may be easier to write these test programs once you have implemented fork() and IPC in parts B and C of this lab.

给进程调度加入优先级。这个问题实现起来是很轻松的,在内核开辟一块进程调度队列即可。不过原本代码根本没有考虑优先级,所以调度部分sched_yield()要重写,各种进程状态改变的部分也要加入优先级部分,写起来比较麻烦。但是简单的可以用数组来实现,这样每次进程切换的时候都会遍历一次进程数组,这个简单的实现可以参考XV6的进程优先级实现部分。下面说的是进程队列的实现
1、进程结构体要加入优先级成员,并新建一个进程list类型,或者直接用数组。
2、在内核建立进程队列。
3、在进程创建的时候给与初始优先级。
4、在进程切换的时候,当前进程优先级变为初始值入队,其余进程优先级递增(可加权递增),在进程队列中选择最大的一个运行。
5、进程在变为非runnable(阻塞)的时候要从进程队列删除,恢复的时候加入,为了提高程序的响应速度,可以在恢复的时候增加优先级。

这个模型的优点是实现方便,操作灵活,在并发不多的环境下性能也不错。

Challenge 2 The JOS kernel currently does not allow applications to use the x86 processor’s x87 floating-point unit (FPU), MMX instructions, or Streaming SIMD Extensions (SSE). Extend the Env structure to provide a save area for the processor’s floating point state, and extend the context switching code to save and restore this state properly when switching from one environment to another. The FXSAVE and FXRSTOR instructions may be useful, but note that these are not in the old i386 user’s manual because they were introduced in more recent processors. Write a user-level test program that does something cool with floating-point.
给中断加入保存浮点寄存器的功能。
1、给inc/trap.h文件中的Trapframe结构体新增char tf_fpus[512]成员,并新增uint32_t tf_padding0[3]用来对齐。
2、修改kern/trapentry.S文件中的_alltraps函数,加入保存fpu寄存器的功能

//ASM code
_alltraps:
	/*make Trapframe*/
	pushl %ds;
	pushl %es;
	pushal;
	/*save FPU*/

	subl 4, %esp;
	fxsave (%esp);
	/*load kernel data segment*/
	movl $GD_KD, %eax;
	movw %ax, %ds;

	movw %ax, %es;
	/*push trap argument pointer to Trapframe*/
	pushl %esp;
	call trap;
	/*trap fall*/
	addl , %esp;
	fxrstor (%esp);
	addl 4, %esp;
	popal;
	popl %es;
	popl %ds;
	addl , %esp;
	iret

3、修改kern/env.c文件env_pop_tf()函数,加入恢复fpu功能

void
env_pop_tf(struct Trapframe *tf)
{
	__asm __volatile("movl %0,%%esp/n"
		"/tfxrstor (%%esp)/n" /*restore fpu*/
		"/taddl 4, %%esp/n"
		"/tpopal/n"
		"/tpopl %%es/n"
		"/tpopl %%ds/n"
		"/taddl void
env_pop_tf(struct Trapframe *tf)
{
	__asm __volatile("movl %0,%%esp/n"
		"/tfxrstor (%%esp)/n" /*restore fpu*/
		"/taddl 4, %%esp/n"
		"/tpopal/n"
		"/tpopl %%es/n"
		"/tpopl %%ds/n"
		"/taddl void
env_pop_tf(struct Trapframe *tf)
{
	__asm __volatile("movl %0,%%esp/n"
		"/tfxrstor (%%esp)/n" /*restore fpu*/
		"/taddl 4, %%esp/n"
		"/tpopal/n"
		"/tpopl %%es/n"
		"/tpopl %%ds/n"
		"/taddl void
env_pop_tf(struct Trapframe *tf)
{
	__asm __volatile("movl %0,%%esp/n"
		"/tfxrstor (%%esp)/n" /*restore fpu*/
		"/taddl $524, %%esp/n"
		"/tpopal/n"
		"/tpopl %%es/n"
		"/tpopl %%ds/n"
		"/taddl $0x8, %%esp/n" /* skip tf_trapno and tf_errcode */
		"/tiret"
		: : "g" (tf) : "memory");
	panic("iret failed");  /* mostly to placate the compiler */
}x8, %%esp/n" /* skip tf_trapno and tf_errcode */
		"/tiret"
		: : "g" (tf) : "memory");
	panic("iret failed");  /* mostly to placate the compiler */
}x8, %%esp/n" /* skip tf_trapno and tf_errcode */
		"/tiret"
		: : "g" (tf) : "memory");
	panic("iret failed");  /* mostly to placate the compiler */
}x8, %%esp/n" /* skip tf_trapno and tf_errcode */
		"/tiret"
		: : "g" (tf) : "memory");
	panic("iret failed");  /* mostly to placate the compiler */
}

4、测试用例是修改的user/dumbfork.c的umain()函数。

void
umain(void)
{
	envid_t who;
	int i;
	double	a1=2.0,
		a=getchar();
	//move a to st(0)
 	asm("fldl %0/n/t"::"m"(a));
	who = dumbfork();
	if(who)
	{
		for (i = 0; i < 6; i++)
		{
			//mull
			asm("fmull %0"::"m"(a1));
			sys_yield();
		}
		//move fpu result to a
		asm("fstpl %0":"=m"(a));
		cprintf("parent a=%d/n",(int) a);
	}
	else
	{
		for(i=0; i<6; i++)
		{
			asm("fmull %0"::"m"(a1));
			sys_yield();
		}
		asm("fstpl %0":"=m"(a));
		cprintf ("child a=%d/n",(int)a);
	}
}

注意
1、利用父子进程不停的切换来测试浮点寄存器有没有被保存。
2、gcc 4.5.1 x86版本 -03优化好像对于浮点运算提供了保护功能,如果运算后有函数调用会浮点数保存回去(或者是没有达到优化条件?),这样即使函数内部使用了浮点寄存器也不会影响到外面。所以只好用嵌入汇编测试了,不知道有没有编译参数可以调整。当我把上面循环手工展开,把sys_yield()改成直接inline到系统调用时,只用C代码也可以达到测试的效果。

Challenge 3 Add the additional system calls necessary to read all of the vital state of an existing environment as well as set it up. Then implement a user mode program that forks off a child environment, runs it for a while (e.g., a few iterations of sys_yield()), then takes a complete snapshot or checkpoint of the child environment, runs the child for a while longer, and finally restores the child environment to the state it was in at the checkpoint and continues it from there. Thus, you are effectively "replaying" the execution of the child environment from an intermediate state. Make the child environment perform some interaction with the user using sys_cgetc() or readline() so that the user can view and mutate its internal state, and verify that with your checkpoint/restart you can give the child environment a case of selective amnesia, making it "forget" everything that happened beyond a certain point.

增加系统调用,提供进程快照的功能。要提供完整的快照功能,除了要保存进程的全部数据,如果考虑到进程不掉用exec来替换代码文本外,需要保存以下数据
1、进程全局变量
2、进程用户栈
3、进程环境变量Env结构体
内核栈数据不需要保存,因为没有内核抢占功能,内核栈是共用的,如果想保存全局变量的话需要修改载入程序文件处的函数用来保存一些信息,而且不同的全局变量所在段大小是不同的,在用户空间分配也比较麻烦,所以这里暂时只保存用户栈和Env结构体,也可以实现简单的进程快照功能。具体实现如下:
1、在inc/env.h中声明进程状态的结构体

struct proc_status {
        struct Env env;
        char stack[PGSIZE];
};

2、修改inc/syscall.h中的enmu,增加sys_proc_sav和sys_proc_rst两个调用号

3、在kern/syscall.c中新增两个函数sys_proc_save和sys_proc_restore分别用来保存和恢复进程状态,并且修改syscal()函数,使得这两个函数处理新增的两个调用号

//save one process status
static int
sys_proc_save(envid_t envid, struct proc_status *ps)
{
	struct Env *olde;
	struct Page *pg;
	int offset;
	//save env
	if (envid2env(envid, &olde, 1) <0)
		return -E_BAD_ENV;
	if (user_mem_check(curenv, ps, sizeof(struct proc_status), PTE_U|PTE_W|PTE_P) <0)
		return -E_FAULT;
	ps->env = *olde;
	//save stack
	if ((pg=page_lookup(olde->env_pgdir, (void *)(USTACKTOP-PGSIZE), NULL))==NULL)
		return -E_FAULT;
	offset = olde->env_tf.tf_esp+PGSIZE-USTACKTOP;
	memmove(ps->stack, page2kva(pg), PGSIZE);
	cprintf("process %x has been saved/n", envid);
	return 0;
}
//restore one process
static int
sys_proc_restore(envid_t envid, const struct proc_status *ps)
{
	struct Env *olde;
	struct Page *pg;
	int offset;
	//restore env
	if (envid2env(envid, &olde, 1) <0)
		return -E_BAD_ENV;
	if (user_mem_check(curenv, ps, sizeof(struct proc_status), PTE_U|PTE_P) <0)
		return -E_FAULT;
	//print_trapframe(&(olde->env_tf));
	*olde = ps->env;
	//restore stack
	if ((pg=page_lookup(olde->env_pgdir, (void *)(USTACKTOP-PGSIZE), NULL))==NULL)
		return -E_FAULT;
	offset = olde->env_tf.tf_esp+PGSIZE-USTACKTOP;
	memmove(page2kva(pg), ps->stack, PGSIZE);
	cprintf("process %x has been restored/n",envid);
	return 0;
}

4、修改inc/lib.h文件,声明两个系统调用的用户接口

int sys_proc_save(envid_t envid, struct proc_status *ps);
int sys_proc_restore(envid_t envid, const struct proc_status *ps);

5、修改lib/syscall.c文件,实现两个系统调用的用户接口

int
sys_proc_save(envid_t envid, struct proc_status *ps)
{
	return syscall(SYS_proc_sav, 1, envid, (uint32_t)ps, 0, 0, 0);
}
int
sys_proc_restore(envid_t envid, const struct proc_status *ps)
{
	return syscall(SYS_proc_rst, 1, envid, (uint32_t)ps, 0, 0, 0);
}

6、在user/dumbfork.c的基础上修改umain()函数用来测试。

struct proc_status ps;
void
umain(void)
{
	envid_t who;
	int i,j,r;
	// fork a child process
	who = dumbfork();
	// print a message and yield to the other a few times
	if(who)
	{//parent
		for (i = 0; i < 10; i++)
		{

			cprintf("step0:%d I am the parent!/n",i );
			if (i==1)
			{
				if ((r=sys_proc_save(who, &ps)) <0)
					panic("sys_env_save: %e", r);
			}
			else if (i==8)
			{
				if ((r=sys_proc_restore(who, &ps))<0)
					panic("sys_env_restore: %e", r);;
			}

			sys_yield();
		}
	}
	else
	{//child
		for(i=0; i<6; i++)
		{
			cprintf("step1:%d I am the chlid/n",i);
			sys_yield();
		}
		for (j=0; j<6; j++)
		{
			cprintf("step2:%d I am the chlid/n",j);
			sys_yield();
		}
	}
}

注意:
1、把进程快照保存在用户空间的好处就是父进程可以非常方便的修改用户状态,不好之处就是风险实在太大。也可以把真实数据放入内核,外界用ID代替,就像envid和filedesc一样。
2、这里把进程快照变量定义成了全局变量,不能放在栈上,因为它还要保存的栈数据呢,这样一来就超过栈的大小了。其实也可以实现成在内核分配正好大小的数据返回指针给用户空间,不过由于目前内核对用户空间的内存管理还不够方便(还没实现堆管理),所以就没这么做。

Question 1. In your implementation of env_run() you should have called lcr3(). Before and after the call to lcr3(), your code makes references (at least it should) to the variable e, the argument to env_run. Upon loading the %cr3 register, the addressing context used by the MMU is instantly changed. But a virtual address (namely e) has meaning relative to a given address context–the address context specifies the physical address to which the virtual address maps. Why can the pointer e be dereferenced both before and after the addressing switch?

因为无论是e变量(保存在内核栈上)还是e指向的环境结构体变量(保存在内核数据段)其页映射关系在每个进程的cr3对应的页目录中都是一致的,所以切换cr3不会引发此方面的异常。

This entry was posted in 操作系统 and tagged . Bookmark the permalink.

6 Responses to [MIT6.828] LAB4 PART A

  1. snail says:

    D大这是在研究什么?Linux内核?

  2. lxblghds says:

    sched_yield()中似乎没有考虑curenv=envs(NENV-1)的情况…..
    求part3啊 ~~~~[e01]

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注