ZYB ARTICLES REPOS

MINIX3分析二

Minix的入口

Minix3支持multiboot的引导协议

其入口逻辑代码,主要是按multiboot协议定义头部,kernel/arch/i386/kernel.lds里指定的入口为__k_unpaged_MINIX,即这里的MINIX。进入MINIX后第一条jmp multiboot_init指令,跳过multiboot的头,直接来到真正的初始化代码。

.text
/*===========================================================================*/
/*				MINIX				     */
/*===========================================================================*/
.global MINIX
MINIX:
/* this is the entry point for the MINIX kernel */
	jmp multiboot_init

/* Multiboot header here*/

.balign 8

#define MULTIBOOT_FLAGS (MULTIBOOT_HEADER_WANT_MEMORY | MULTIBOOT_HEADER_MODS_ALIGNED)

multiboot_magic:
	.long MULTIBOOT_HEADER_MAGIC
multiboot_flags:
	.long MULTIBOOT_FLAGS
multiboot_checksum:
	.long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_FLAGS)
	.long 0
	.long 0
	.long 0
	.long 0
	.long 0
/* Video mode */
multiboot_mode_type:
	.long MULTIBOOT_VIDEO_MODE_EGA
multiboot_width:
	.long MULTIBOOT_CONSOLE_COLS
multiboot_height:
	.long MULTIBOOT_CONSOLE_LINES
multiboot_depth:
	.long 0

multiboot_init:
	mov	$load_stack_start, %esp	/* make usable stack */
	mov	$0, %ebp
	push	$0	/* set flags to known good state */
	popf	/* esp, clear nested task and int enable */
	push	$0

        push    %ebx	/* multiboot information struct */
	push	%eax	/* multiboot magic number */
        call    _C_LABEL(pre_init)

	/* Kernel is mapped high now and ready to go, with
	 * the boot info pointer returnd in %eax. Set the
	 * highly mapped stack, initialize it, push the boot
	 * info pointer and jump to the highly mapped kernel.
	 */
	mov	$k_initial_stktop, %esp
	push	$0	/* Terminate stack */
	push	%eax
        call    _C_LABEL(kmain)

	/* not reached */
hang:
	jmp hang

.data
load_stack:
	.space 4096
load_stack_start:

这段代码,首先初始化了一个4096字节大小的栈,栈顶为load_stack_start,将ebpeflags清0。

再向栈上依次压入三个参数, 0、multiboot传来的参数、multiboot的魔数,调用pre_init,接过multiboot的接力棒继续完成初始化逻辑。初始化完后,将栈顶切换到k_initial_stktop,并从kmain进入。

pre_init

先来看pre_init的逻辑,它定义在kernel/arch/i386/pre_init.c

include/minix/param.h

/* Number of processes contained in the system image. */
#define NR_BOOT_PROCS   (NR_TASKS + LAST_SPECIAL_PROC_NR + 1)

#ifdef _MINIX_SYSTEM
/* This is used to obtain system information through SYS_GETINFO. */
#define MAXMEMMAP 40
typedef struct kinfo {
        /* Straight multiboot-provided info */
        multiboot_info_t        mbi;
        multiboot_module_t      module_list[MULTIBOOT_MAX_MODS];
        multiboot_memory_map_t  memmap[MAXMEMMAP]; /* free mem list */
        phys_bytes              mem_high_phys;
        int                     mmap_size;

        /* Multiboot-derived */
        int                     mods_with_kernel; /* no. of mods incl kernel */
        int                     kern_mod; /* which one is kernel */

        /* Minix stuff, started at bootstrap phase */
        int                     freepde_start;  /* lowest pde unused kernel pde */
        char                    param_buf[MULTIBOOT_PARAM_BUF_SIZE];

        /* Minix stuff */
        struct kmessages *kmessages;
        int do_serial_debug;    /* system serial output */
        int serial_debug_baud;  /* serial baud rate */
        int minix_panicing;     /* are we panicing? */
        vir_bytes               user_sp; /* where does kernel want stack set */
        vir_bytes               user_end; /* upper proc limit */
        vir_bytes               vir_kern_start; /* kernel addrspace starts */
        vir_bytes               bootstrap_start, bootstrap_len;
        struct boot_image       boot_procs[NR_BOOT_PROCS];
        int nr_procs;           /* number of user processes */
        int nr_tasks;           /* number of kernel tasks */
        char release[6];        /* kernel release number */
        char version[6];        /* kernel version number */
	int vm_allocated_bytes; /* allocated by kernel to load vm */
	int kernel_allocated_bytes;		/* used by kernel */
	int kernel_allocated_bytes_dynamic;	/* used by kernel (runtime) */
} kinfo_t;
#endif /* _MINIX_SYSTEM */
kinfo_t *pre_init(u32_t magic, u32_t ebx)
{
	assert(magic == MULTIBOOT_INFO_MAGIC);

	/* Get our own copy boot params pointed to by ebx.
	 * Here we find out whether we should do serial output.
	 */
	get_parameters(ebx, &kinfo);

	/* Make and load a pagetable that will map the kernel
	 * to where it should be; but first a 1:1 mapping so
	 * this code stays where it should be.
	 */
	pg_clear();
	pg_identity(&kinfo);
	kinfo.freepde_start = pg_mapkernel();
	pg_load();
	vm_enable_paging();

	/* Done, return boot info so it can be passed to kmain(). */
	return &kinfo;
}
void get_parameters(u32_t ebx, kinfo_t *cbi)
{
	multiboot_memory_map_t *mmap;
	multiboot_info_t *mbi = &cbi->mbi;
	int var_i,value_i, m, k;
	char *p;
	extern char _kern_phys_base, _kern_vir_base, _kern_size,
		_kern_unpaged_start, _kern_unpaged_end;
	phys_bytes kernbase = (phys_bytes) &_kern_phys_base,
		kernsize = (phys_bytes) &_kern_size;
#define BUF 1024
	static char cmdline[BUF];

	/* get our own copy of the multiboot info struct and module list */
	memcpy((void *) mbi, (void *) ebx, sizeof(*mbi));

	/* Set various bits of info for the higher-level kernel. */
	cbi->mem_high_phys = 0;
	cbi->user_sp = (vir_bytes) &_kern_vir_base;
	cbi->vir_kern_start = (vir_bytes) &_kern_vir_base;
	cbi->bootstrap_start = (vir_bytes) &_kern_unpaged_start;
	cbi->bootstrap_len = (vir_bytes) &_kern_unpaged_end -
		cbi->bootstrap_start;
	cbi->kmess = &kmess;

	/* set some configurable defaults */
	cbi->do_serial_debug = 0;
	cbi->serial_debug_baud = 115200;

	/* parse boot command line */
	if (mbi->mi_flags & MULTIBOOT_INFO_HAS_CMDLINE) {
		static char var[BUF];
		static char value[BUF];

		/* Override values with cmdline argument */
		memcpy(cmdline, (void *) mbi->mi_cmdline, BUF);
		p = cmdline;
		while (*p) {
			var_i = 0;
			value_i = 0;
			while (*p == ' ') p++;
			if (!*p) break;
			while (*p && *p != '=' && *p != ' ' && var_i < BUF - 1)
				var[var_i++] = *p++ ;
			var[var_i] = 0;
			if (*p++ != '=') continue; /* skip if not name=value */
			while (*p && *p != ' ' && value_i < BUF - 1)
				value[value_i++] = *p++ ;
			value[value_i] = 0;

			mb_set_param(cbi->param_buf, var, value, cbi);
		}
	}

        /* let higher levels know what we are booting on */
        mb_set_param(cbi->param_buf, ARCHVARNAME, (char *)get_board_arch_name(BOARD_ID_INTEL), cbi);
	mb_set_param(cbi->param_buf, BOARDVARNAME,(char *)get_board_name(BOARD_ID_INTEL) , cbi);

	/* move user stack/data down to leave a gap to catch kernel
	 * stack overflow; and to distinguish kernel and user addresses
	 * at a glance (0xf.. vs 0xe..)
	 */
	cbi->user_sp = USR_STACKTOP;
	cbi->user_end = USR_DATATOP;

	/* kernel bytes without bootstrap code/data that is currently
	 * still needed but will be freed after bootstrapping.
	 */
	kinfo.kernel_allocated_bytes = (phys_bytes) &_kern_size;
	kinfo.kernel_allocated_bytes -= cbi->bootstrap_len;

	assert(!(cbi->bootstrap_start % I386_PAGE_SIZE));
	cbi->bootstrap_len = rounddown(cbi->bootstrap_len, I386_PAGE_SIZE);
	assert(mbi->mi_flags & MULTIBOOT_INFO_HAS_MODS);
	assert(mbi->mi_mods_count < MULTIBOOT_MAX_MODS);
	assert(mbi->mi_mods_count > 0);
	memcpy(&cbi->module_list, (void *) mbi->mi_mods_addr,
		mbi->mi_mods_count * sizeof(multiboot_module_t));

	memset(cbi->memmap, 0, sizeof(cbi->memmap));
	/* mem_map has a variable layout */
	if(mbi->mi_flags & MULTIBOOT_INFO_HAS_MMAP) {
		cbi->mmap_size = 0;
	        for (mmap = (multiboot_memory_map_t *) mbi->mmap_addr;
       	     (unsigned long) mmap < mbi->mmap_addr + mbi->mmap_length;
       	       mmap = (multiboot_memory_map_t *)
		      	((unsigned long) mmap + mmap->mm_size + sizeof(mmap->mm_size))) {
			if(mmap->mm_type != MULTIBOOT_MEMORY_AVAILABLE) continue;
			add_memmap(cbi, mmap->mm_base_addr, mmap->mm_length);
		}
	} else {
		assert(mbi->mi_flags & MULTIBOOT_INFO_HAS_MEMORY);
		add_memmap(cbi, 0, mbi->mi_mem_lower*1024);
		add_memmap(cbi, 0x100000, mbi->mi_mem_upper*1024);
	}

	/* Sanity check: the kernel nor any of the modules may overlap
	 * with each other. Pretend the kernel is an extra module for a
	 * second.
	 */
	k = mbi->mi_mods_count;
	assert(k < MULTIBOOT_MAX_MODS);
	cbi->module_list[k].mod_start = kernbase;
	cbi->module_list[k].mod_end = kernbase + kernsize;
	cbi->mods_with_kernel = mbi->mi_mods_count+1;
	cbi->kern_mod = k;

	for(m = 0; m < cbi->mods_with_kernel; m++) {
#if 0
		printf("checking overlap of module %08lx-%08lx\n",
		  cbi->module_list[m].mod_start, cbi->module_list[m].mod_end);
#endif
		if(overlaps(cbi->module_list, cbi->mods_with_kernel, m))
			panic("overlapping boot modules/kernel");
		/* We cut out the bits of memory that we know are
		 * occupied by the kernel and boot modules.
		 */
		cut_memmap(cbi,
			cbi->module_list[m].mod_start,
			cbi->module_list[m].mod_end);
	}
}

Minix3的Boot Options

经常使用的

Parameter Description
cttyline=0 Used for serial debugging; parameter selects serial port.
cttybaud= Sets serial baud rate. 115200 is recommended.
bootopts=-s Enables single-user mode.
no_apic= to turn APIC on, to turn it off. There is also apic_timer_x=. acpi=yes is required with no_apic=0
no_smp= to turn SMP on, to turn it off.
pci_debug=1 Enable PCI driver debug prints.
rootdevname= E.g. c0d0p0s0; Sets the partition from which to load boot components.
watchdog= to turn on the NMI watchdog (kernel lock up detection; also requires no_apic=0 and acpi=yes).

不常用的

Parameter Description
acpi=yes Use the ACPI driver.
ahci=yes Use the AHCI driver instead of the “at_wini” IDE driver (experimental!). Many other ahci= options are available to help debugging; see the code.
ata_no_dma=1 Disable DMA.
ata_pci_debug=1 Enable PCI-related debugging info in at_wini driver. Some other ata= options are available to help debugging; see the code.
atapi_dma=1 Enable ATAPI DMA.
bios_wini=yes Use the BIOS driver instead of the “at_wini” IDE driver.
bios_remap_first=1 Force the boot drive to be remapped as d7; this allows a machine booted from an inserted media (like a faked disk image from CD booting or a USB thumb drive) to retain its normal drive assignation.
console=8003 Enable 50 lines for the text-mode console.
debug_fkeys=1 Enable pressing F1..F10 keys to show information about the system on the console.
disable= Prevent one daemon (optional service) to start; useful when debugging, or if some piece of hardware has problems; disable=inet will prevent the INET network stack to start; disable=lwip,lance will prevent both the LWIP network stack and the LANCE driver to start.
hz=60 Set the system heart beat.
label= Allows the binding of several configurations in a single image.
memory= Describe the installed memory in the computer; useful to override incorrect or faulty detection or to fake a memory-starved machine.
oxpcie= document me!
rootdev= document me! Also bootdev, ramimagedev, and ramsize, described in the monitor(8) manpage.
rs_verbose=1 Enable debugging of Resurrection Server.
skip_boot_config=1 Prevents the service edit operation on the booting processes (??? document me better!)
sticky_alt=1 Changes the behaviour of the Alt key to match Unix practice (Meta key).
verbose=1 Enable debugging while loading and starting kernel; verbose=2 is much more verbose.
vm_sanitychecklevel= Enable debugging of VM Server.
	cbi->user_sp = (vir_bytes) &_kern_vir_base;
	cbi->vir_kern_start = (vir_bytes) &_kern_vir_base;

_kern_vir_base == 0xF0400000,定义在kernel.lds里。内核虚拟地址入口为该地址,栈顶也是该地址。

__k_unpaged__kern_unpaged_start,地址为0x00400000

	_kern_phys_base = 0x00400000;

    . = _kern_phys_base;
    __k_unpaged__kern_unpaged_start = .;

    .unpaged_text : { unpaged_*.o(.text) }
    .unpaged_data ALIGN(4096) : { unpaged_*.o(.data .rodata*) }
    .unpaged_bss  ALIGN(4096) : { unpaged_*.o(.bss COMMON) }
    __k_unpaged__kern_unpaged_end = .;
	cbi->bootstrap_start = (vir_bytes) &_kern_unpaged_start;
	cbi->bootstrap_len = (vir_bytes) &_kern_unpaged_end -
		cbi->bootstrap_start;

因此这就是在设置内核未页映射前的textdatabss所占的大小

接下来就是解析传给内核的cmdline字符串,这个字符串只接受 name=value 形式的参数,由空格分格不同的name=value组。根据解析逻辑来看name=value必需连起来'='两边不能有空格,也不支持namevalue有空格。如果不是name=value的参数会被忽略掉,且namevalue的最大长度都是BUF-11023

	cbi->user_sp = USR_STACKTOP;
	cbi->user_end = USR_DATATOP;

USR_STACKTOPUSR_DATATOP的值都是0xF0000000,与内核的0xF0400000隔了一段距离。

	kinfo.kernel_allocated_bytes = (phys_bytes) &_kern_size;
	kinfo.kernel_allocated_bytes -= cbi->bootstrap_len;

这两行就是排除启动阶段占用的空间,计算出内核实际占用的空间。

static int mb_set_param(char *bigbuf, char *name, char *value, kinfo_t *cbi)
{
	char *p = bigbuf;
	char *bufend = bigbuf + MULTIBOOT_PARAM_BUF_SIZE;
	char *q;
	int namelen = strlen(name);
	int valuelen = strlen(value);

	/* Some variables we recognize */
	if(!strcmp(name, SERVARNAME)) { cbi->do_serial_debug = 1; }
	if(!strcmp(name, SERBAUDVARNAME)) { cbi->serial_debug_baud = atoi(value); }

	/* Delete the item if already exists */
	while (*p) {
		if (strncmp(p, name, namelen) == 0 && p[namelen] == '=') {
			q = p;
			while (*q) q++;
			for (q++; q < bufend; q++, p++)
				*p = *q;
			break;
		}
		while (*p++)
			;
		p++;
	}

	for (p = bigbuf; p < bufend && (*p || *(p + 1)); p++)
		;
	if (p > bigbuf) p++;

	/* Make sure there's enough space for the new parameter */
	if (p + namelen + valuelen + 3 > bufend)
		return -1;

	strcpy(p, name);
	p[namelen] = '=';
	strcpy(p + namelen + 1, value);
	p[namelen + valuelen + 1] = 0;
	p[namelen + valuelen + 2] = 0;
	return 0;
}

mb_set_param这个函数的作用其实就是把name=value的参数放到bigbuf里,bigbuf数据是以\0分隔的,如name1=value1\0name2=value2..., 如果有两个相同的namemb_set_param会删除之前的内容,将新的name=value加进bigbuf

按下来的MULTIBOOT_INFO_HAS_MMAPadd_memmap相关的代码,是multiboot传内核的内存信息,有可能会有一些区间,有的区间能用,有的不能用。MULTIBOOT_MEMORY_AVAILABLE的内存区间就通过add_memmap加到kinfo.memmap数组里。

最后是模块相关的逻辑,加载内核模块的时候将最后一个模块设置为内核本身。并且校验这些模块是否有区间重叠。最后,将模块所占的内存空间从kinfo.memmap数组拆分出来。

当这些做完后,kinfo基本初始化完了。还余下boot_procsnr_procsnr_tasksvm_allocated_bytes等未初始化。

kmain

回到pre_init,接下来还有页初始化相关的代码。

先用pg_clear把页目录清0.

再用pg_identity初始化页目录各项,每一项直接映射4M页。这个做完就相当于完成了线性地址物理地址的一一映射。

// kernel/arch/i386/pre_init.c
void pg_identity(kinfo_t *cbi)
{
	uint32_t i;
	phys_bytes phys;

	/* We map memory that does not correspond to physical memory
	 * as non-cacheable. Make sure we know what it is.
	 */
	assert(cbi->mem_high_phys);

	/* Set up an identity mapping page directory */
	for (i = 0; i < I386_VM_DIR_ENTRIES; i++)
	{
		u32_t flags = I386_VM_PRESENT | I386_ VM_BIGPAGE | I386_VM_USER | I386_VM_WRITE;
		phys = i * I386_BIG_PAGE_SIZE;
		if ((cbi->mem_high_phys & I386_VM_ADDR_MASK_4MB) <= (phys & I386_VM_ADDR_MASK_4MB))
		{
			flags |= I386_VM_PWT | I386_VM_PCD;
		}
		pagedir[i] = phys | flags;
	}
}

接下来的pg_mapkernel比较简单,就是把内核所在的页,去掉I386_VM_USER属性。

int pg_mapkernel(void)
{
	int pde;
	u32_t mapped = 0, kern_phys = kern_phys_start;

	assert(!(kern_vir_start % I386_BIG_PAGE_SIZE));
	assert(!(kern_phys % I386_BIG_PAGE_SIZE));
	pde = kern_vir_start / I386_BIG_PAGE_SIZE; /* start pde */
	while (mapped < kern_kernlen)
	{
		pagedir[pde] = kern_phys | I386_VM_PRESENT |
					   I386_VM_BIGPAGE | I386_VM_WRITE;
		mapped += I386_BIG_PAGE_SIZE;
		kern_phys += I386_BIG_PAGE_SIZE;
		pde++;
	}
	return pde; /* free pde */
}

然后将页目录的物理地址写入cr3寄存器

phys_bytes pg_load(void)
{
	phys_bytes phpagedir = vir2phys(pagedir);
	write_cr3(phpagedir);
	return phpagedir;
}

再开启分页

void vm_enable_paging(void)
{
	u32_t cr0, cr4;
	int pgeok;

	pgeok = _cpufeature(_CPUF_I386_PGE);

#ifdef PAE
	if (_cpufeature(_CPUF_I386_PAE) == 0)
		panic("kernel built with PAE support, CPU seems to lack PAE support?\n");
#endif

	cr0 = read_cr0();
	cr4 = read_cr4();

	/* The boot loader should have put us in protected mode. */
	assert(cr0 & I386_CR0_PE);

	/* First clear PG and PGE flag, as PGE must be enabled after PG. */
	write_cr0(cr0 & ~I386_CR0_PG);
	write_cr4(cr4 & ~(I386_CR4_PGE | I386_CR4_PSE));

	cr0 = read_cr0();
	cr4 = read_cr4();

	/* Our page table contains 4MB entries. */
	cr4 |= I386_CR4_PSE;

	write_cr4(cr4);

	/* First enable paging, then enable global page flag. */
	cr0 |= I386_CR0_PG;
	write_cr0(cr0);
	cr0 |= I386_CR0_WP;
	write_cr0(cr0);

	/* May we enable these features? */
	if (pgeok)
		cr4 |= I386_CR4_PGE;

	write_cr4(cr4);
}

至此pre_init的工作就完成了。然后就是回到kernel/arch/i386/head.S的代码,将栈切换到k_initial_stktop,准备调用kmain了。

这里的eax就是pre_init返回的kinfo的地址。

	mov	$k_initial_stktop, %esp
	push	$0	/* Terminate stack */
	push	%eax
        call    _C_LABEL(kmain)

kmain定义在kernel/main.c里,其形式如下

void kmain(kinfo_t *local_cbi);