MINIX3分析二
Minix的入口
Minix3支持multiboot的引导协议
其入口逻辑代码,主要是按multiboot协议定义头部,kernel/arch/i386/kernel.lds
里指定的入口为__k_unpaged_MINIX
,即这里的MINIX
。进入MINIX
后第一条jmp multiboot_init
指令,跳过multiboot的头,直接来到真正的初始化代码。
.text
/*===========================================================================*/
/* MINIX */
/*===========================================================================*/
.global MINIX
MINIX:
/* this is the entry point for the MINIX kernel */
jmp multiboot_init
/* Multiboot header here*/
.balign 8
#define MULTIBOOT_FLAGS (MULTIBOOT_HEADER_WANT_MEMORY | MULTIBOOT_HEADER_MODS_ALIGNED)
multiboot_magic:
.long MULTIBOOT_HEADER_MAGIC
multiboot_flags:
.long MULTIBOOT_FLAGS
multiboot_checksum:
.long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_FLAGS)
.long 0
.long 0
.long 0
.long 0
.long 0
/* Video mode */
multiboot_mode_type:
.long MULTIBOOT_VIDEO_MODE_EGA
multiboot_width:
.long MULTIBOOT_CONSOLE_COLS
multiboot_height:
.long MULTIBOOT_CONSOLE_LINES
multiboot_depth:
.long 0
multiboot_init:
mov $load_stack_start, %esp /* make usable stack */
mov $0, %ebp
push $0 /* set flags to known good state */
popf /* esp, clear nested task and int enable */
push $0
push %ebx /* multiboot information struct */
push %eax /* multiboot magic number */
call _C_LABEL(pre_init)
/* Kernel is mapped high now and ready to go, with
* the boot info pointer returnd in %eax. Set the
* highly mapped stack, initialize it, push the boot
* info pointer and jump to the highly mapped kernel.
*/
mov $k_initial_stktop, %esp
push $0 /* Terminate stack */
push %eax
call _C_LABEL(kmain)
/* not reached */
hang:
jmp hang
.data
load_stack:
.space 4096
load_stack_start:
这段代码,首先初始化了一个4096字节大小的栈,栈顶为load_stack_start
,将ebp
、eflags
清0。
再向栈上依次压入三个参数, 0、multiboot传来的参数、multiboot的魔数,调用pre_init
,接过multiboot的接力棒继续完成初始化逻辑。初始化完后,将栈顶切换到k_initial_stktop
,并从kmain
进入。
pre_init
先来看pre_init
的逻辑,它定义在kernel/arch/i386/pre_init.c
。
include/minix/param.h
/* Number of processes contained in the system image. */
#define NR_BOOT_PROCS (NR_TASKS + LAST_SPECIAL_PROC_NR + 1)
#ifdef _MINIX_SYSTEM
/* This is used to obtain system information through SYS_GETINFO. */
#define MAXMEMMAP 40
typedef struct kinfo {
/* Straight multiboot-provided info */
multiboot_info_t mbi;
multiboot_module_t module_list[MULTIBOOT_MAX_MODS];
multiboot_memory_map_t memmap[MAXMEMMAP]; /* free mem list */
phys_bytes mem_high_phys;
int mmap_size;
/* Multiboot-derived */
int mods_with_kernel; /* no. of mods incl kernel */
int kern_mod; /* which one is kernel */
/* Minix stuff, started at bootstrap phase */
int freepde_start; /* lowest pde unused kernel pde */
char param_buf[MULTIBOOT_PARAM_BUF_SIZE];
/* Minix stuff */
struct kmessages *kmessages;
int do_serial_debug; /* system serial output */
int serial_debug_baud; /* serial baud rate */
int minix_panicing; /* are we panicing? */
vir_bytes user_sp; /* where does kernel want stack set */
vir_bytes user_end; /* upper proc limit */
vir_bytes vir_kern_start; /* kernel addrspace starts */
vir_bytes bootstrap_start, bootstrap_len;
struct boot_image boot_procs[NR_BOOT_PROCS];
int nr_procs; /* number of user processes */
int nr_tasks; /* number of kernel tasks */
char release[6]; /* kernel release number */
char version[6]; /* kernel version number */
int vm_allocated_bytes; /* allocated by kernel to load vm */
int kernel_allocated_bytes; /* used by kernel */
int kernel_allocated_bytes_dynamic; /* used by kernel (runtime) */
} kinfo_t;
#endif /* _MINIX_SYSTEM */
kinfo_t *pre_init(u32_t magic, u32_t ebx)
{
assert(magic == MULTIBOOT_INFO_MAGIC);
/* Get our own copy boot params pointed to by ebx.
* Here we find out whether we should do serial output.
*/
get_parameters(ebx, &kinfo);
/* Make and load a pagetable that will map the kernel
* to where it should be; but first a 1:1 mapping so
* this code stays where it should be.
*/
pg_clear();
pg_identity(&kinfo);
kinfo.freepde_start = pg_mapkernel();
pg_load();
vm_enable_paging();
/* Done, return boot info so it can be passed to kmain(). */
return &kinfo;
}
void get_parameters(u32_t ebx, kinfo_t *cbi)
{
multiboot_memory_map_t *mmap;
multiboot_info_t *mbi = &cbi->mbi;
int var_i,value_i, m, k;
char *p;
extern char _kern_phys_base, _kern_vir_base, _kern_size,
_kern_unpaged_start, _kern_unpaged_end;
phys_bytes kernbase = (phys_bytes) &_kern_phys_base,
kernsize = (phys_bytes) &_kern_size;
#define BUF 1024
static char cmdline[BUF];
/* get our own copy of the multiboot info struct and module list */
memcpy((void *) mbi, (void *) ebx, sizeof(*mbi));
/* Set various bits of info for the higher-level kernel. */
cbi->mem_high_phys = 0;
cbi->user_sp = (vir_bytes) &_kern_vir_base;
cbi->vir_kern_start = (vir_bytes) &_kern_vir_base;
cbi->bootstrap_start = (vir_bytes) &_kern_unpaged_start;
cbi->bootstrap_len = (vir_bytes) &_kern_unpaged_end -
cbi->bootstrap_start;
cbi->kmess = &kmess;
/* set some configurable defaults */
cbi->do_serial_debug = 0;
cbi->serial_debug_baud = 115200;
/* parse boot command line */
if (mbi->mi_flags & MULTIBOOT_INFO_HAS_CMDLINE) {
static char var[BUF];
static char value[BUF];
/* Override values with cmdline argument */
memcpy(cmdline, (void *) mbi->mi_cmdline, BUF);
p = cmdline;
while (*p) {
var_i = 0;
value_i = 0;
while (*p == ' ') p++;
if (!*p) break;
while (*p && *p != '=' && *p != ' ' && var_i < BUF - 1)
var[var_i++] = *p++ ;
var[var_i] = 0;
if (*p++ != '=') continue; /* skip if not name=value */
while (*p && *p != ' ' && value_i < BUF - 1)
value[value_i++] = *p++ ;
value[value_i] = 0;
mb_set_param(cbi->param_buf, var, value, cbi);
}
}
/* let higher levels know what we are booting on */
mb_set_param(cbi->param_buf, ARCHVARNAME, (char *)get_board_arch_name(BOARD_ID_INTEL), cbi);
mb_set_param(cbi->param_buf, BOARDVARNAME,(char *)get_board_name(BOARD_ID_INTEL) , cbi);
/* move user stack/data down to leave a gap to catch kernel
* stack overflow; and to distinguish kernel and user addresses
* at a glance (0xf.. vs 0xe..)
*/
cbi->user_sp = USR_STACKTOP;
cbi->user_end = USR_DATATOP;
/* kernel bytes without bootstrap code/data that is currently
* still needed but will be freed after bootstrapping.
*/
kinfo.kernel_allocated_bytes = (phys_bytes) &_kern_size;
kinfo.kernel_allocated_bytes -= cbi->bootstrap_len;
assert(!(cbi->bootstrap_start % I386_PAGE_SIZE));
cbi->bootstrap_len = rounddown(cbi->bootstrap_len, I386_PAGE_SIZE);
assert(mbi->mi_flags & MULTIBOOT_INFO_HAS_MODS);
assert(mbi->mi_mods_count < MULTIBOOT_MAX_MODS);
assert(mbi->mi_mods_count > 0);
memcpy(&cbi->module_list, (void *) mbi->mi_mods_addr,
mbi->mi_mods_count * sizeof(multiboot_module_t));
memset(cbi->memmap, 0, sizeof(cbi->memmap));
/* mem_map has a variable layout */
if(mbi->mi_flags & MULTIBOOT_INFO_HAS_MMAP) {
cbi->mmap_size = 0;
for (mmap = (multiboot_memory_map_t *) mbi->mmap_addr;
(unsigned long) mmap < mbi->mmap_addr + mbi->mmap_length;
mmap = (multiboot_memory_map_t *)
((unsigned long) mmap + mmap->mm_size + sizeof(mmap->mm_size))) {
if(mmap->mm_type != MULTIBOOT_MEMORY_AVAILABLE) continue;
add_memmap(cbi, mmap->mm_base_addr, mmap->mm_length);
}
} else {
assert(mbi->mi_flags & MULTIBOOT_INFO_HAS_MEMORY);
add_memmap(cbi, 0, mbi->mi_mem_lower*1024);
add_memmap(cbi, 0x100000, mbi->mi_mem_upper*1024);
}
/* Sanity check: the kernel nor any of the modules may overlap
* with each other. Pretend the kernel is an extra module for a
* second.
*/
k = mbi->mi_mods_count;
assert(k < MULTIBOOT_MAX_MODS);
cbi->module_list[k].mod_start = kernbase;
cbi->module_list[k].mod_end = kernbase + kernsize;
cbi->mods_with_kernel = mbi->mi_mods_count+1;
cbi->kern_mod = k;
for(m = 0; m < cbi->mods_with_kernel; m++) {
#if 0
printf("checking overlap of module %08lx-%08lx\n",
cbi->module_list[m].mod_start, cbi->module_list[m].mod_end);
#endif
if(overlaps(cbi->module_list, cbi->mods_with_kernel, m))
panic("overlapping boot modules/kernel");
/* We cut out the bits of memory that we know are
* occupied by the kernel and boot modules.
*/
cut_memmap(cbi,
cbi->module_list[m].mod_start,
cbi->module_list[m].mod_end);
}
}
Minix3的Boot Options
经常使用的
Parameter | Description |
---|---|
cttyline=0 | Used for serial debugging; parameter selects serial port. |
cttybaud= | Sets serial baud rate. 115200 is recommended. |
bootopts=-s | Enables single-user mode. |
no_apic= | |
no_smp= | |
pci_debug=1 | Enable PCI driver debug prints. |
rootdevname= | E.g. c0d0p0s0; Sets the partition from which to load boot components. |
watchdog= |
不常用的
Parameter | Description |
---|---|
acpi=yes | Use the ACPI driver. |
ahci=yes | Use the AHCI driver instead of the “at_wini” IDE driver (experimental!). Many other ahci= options are available to help debugging; see the code. |
ata_no_dma=1 | Disable DMA. |
ata_pci_debug=1 | Enable PCI-related debugging info in at_wini driver. Some other ata= options are available to help debugging; see the code. |
atapi_dma=1 | Enable ATAPI DMA. |
bios_wini=yes | Use the BIOS driver instead of the “at_wini” IDE driver. |
bios_remap_first=1 | Force the boot drive to be remapped as d7; this allows a machine booted from an inserted media (like a faked disk image from CD booting or a USB thumb drive) to retain its normal drive assignation. |
console=8003 | Enable 50 lines for the text-mode console. |
debug_fkeys=1 | Enable pressing F1..F10 keys to show information about the system on the console. |
disable= |
Prevent one daemon (optional service) to start; useful when debugging, or if some piece of hardware has problems; disable=inet will prevent the INET network stack to start; disable=lwip,lance will prevent both the LWIP network stack and the LANCE driver to start. |
hz=60 | Set the system heart beat. |
label= | Allows the binding of several configurations in a single image. |
memory= |
Describe the installed memory in the computer; useful to override incorrect or faulty detection or to fake a memory-starved machine. |
oxpcie= |
document me! |
rootdev= |
document me! Also bootdev, ramimagedev, and ramsize, described in the monitor(8) manpage. |
rs_verbose=1 | Enable debugging of Resurrection Server. |
skip_boot_config=1 | Prevents the service edit operation on the booting processes (??? document me better!) |
sticky_alt=1 | Changes the behaviour of the Alt key to match Unix practice (Meta key). |
verbose=1 | Enable debugging while loading and starting kernel; verbose=2 is much more verbose. |
vm_sanitychecklevel= |
Enable debugging of VM Server. |
cbi->user_sp = (vir_bytes) &_kern_vir_base;
cbi->vir_kern_start = (vir_bytes) &_kern_vir_base;
_kern_vir_base == 0xF0400000
,定义在kernel.lds
里。内核虚拟地址入口为该地址,栈顶也是该地址。
__k_unpaged__kern_unpaged_start
,地址为0x00400000
_kern_phys_base = 0x00400000;
. = _kern_phys_base;
__k_unpaged__kern_unpaged_start = .;
.unpaged_text : { unpaged_*.o(.text) }
.unpaged_data ALIGN(4096) : { unpaged_*.o(.data .rodata*) }
.unpaged_bss ALIGN(4096) : { unpaged_*.o(.bss COMMON) }
__k_unpaged__kern_unpaged_end = .;
cbi->bootstrap_start = (vir_bytes) &_kern_unpaged_start;
cbi->bootstrap_len = (vir_bytes) &_kern_unpaged_end -
cbi->bootstrap_start;
因此这就是在设置内核未页映射前的text
、data
、bss
所占的大小
接下来就是解析传给内核的cmdline字符串,这个字符串只接受 name=value
形式的参数,由空格分格不同的name=value
组。根据解析逻辑来看name=value
必需连起来'='
两边不能有空格,也不支持name
或value
有空格。如果不是name=value
的参数会被忽略掉,且name
或value
的最大长度都是BUF-1
即1023
。
cbi->user_sp = USR_STACKTOP;
cbi->user_end = USR_DATATOP;
USR_STACKTOP
和USR_DATATOP
的值都是0xF0000000
,与内核的0xF0400000
隔了一段距离。
kinfo.kernel_allocated_bytes = (phys_bytes) &_kern_size;
kinfo.kernel_allocated_bytes -= cbi->bootstrap_len;
这两行就是排除启动阶段占用的空间,计算出内核实际占用的空间。
static int mb_set_param(char *bigbuf, char *name, char *value, kinfo_t *cbi)
{
char *p = bigbuf;
char *bufend = bigbuf + MULTIBOOT_PARAM_BUF_SIZE;
char *q;
int namelen = strlen(name);
int valuelen = strlen(value);
/* Some variables we recognize */
if(!strcmp(name, SERVARNAME)) { cbi->do_serial_debug = 1; }
if(!strcmp(name, SERBAUDVARNAME)) { cbi->serial_debug_baud = atoi(value); }
/* Delete the item if already exists */
while (*p) {
if (strncmp(p, name, namelen) == 0 && p[namelen] == '=') {
q = p;
while (*q) q++;
for (q++; q < bufend; q++, p++)
*p = *q;
break;
}
while (*p++)
;
p++;
}
for (p = bigbuf; p < bufend && (*p || *(p + 1)); p++)
;
if (p > bigbuf) p++;
/* Make sure there's enough space for the new parameter */
if (p + namelen + valuelen + 3 > bufend)
return -1;
strcpy(p, name);
p[namelen] = '=';
strcpy(p + namelen + 1, value);
p[namelen + valuelen + 1] = 0;
p[namelen + valuelen + 2] = 0;
return 0;
}
mb_set_param
这个函数的作用其实就是把name=value
的参数放到bigbuf
里,bigbuf
数据是以\0
分隔的,如name1=value1\0name2=value2...
, 如果有两个相同的name
,mb_set_param
会删除之前的内容,将新的name=value
加进bigbuf
。
按下来的MULTIBOOT_INFO_HAS_MMAP
add_memmap
相关的代码,是multiboot传内核的内存信息,有可能会有一些区间,有的区间能用,有的不能用。MULTIBOOT_MEMORY_AVAILABLE
的内存区间就通过add_memmap
加到kinfo.memmap
数组里。
最后是模块相关的逻辑,加载内核模块的时候将最后一个模块设置为内核本身。并且校验这些模块是否有区间重叠。最后,将模块所占的内存空间从kinfo.memmap
数组拆分出来。
当这些做完后,kinfo
基本初始化完了。还余下boot_procs
、nr_procs
、nr_tasks
、vm_allocated_bytes
等未初始化。
kmain
回到pre_init
,接下来还有页初始化相关的代码。
先用pg_clear
把页目录清0.
再用pg_identity
初始化页目录各项,每一项直接映射4M页。这个做完就相当于完成了线性地址
到物理地址
的一一映射。
// kernel/arch/i386/pre_init.c
void pg_identity(kinfo_t *cbi)
{
uint32_t i;
phys_bytes phys;
/* We map memory that does not correspond to physical memory
* as non-cacheable. Make sure we know what it is.
*/
assert(cbi->mem_high_phys);
/* Set up an identity mapping page directory */
for (i = 0; i < I386_VM_DIR_ENTRIES; i++)
{
u32_t flags = I386_VM_PRESENT | I386_ VM_BIGPAGE | I386_VM_USER | I386_VM_WRITE;
phys = i * I386_BIG_PAGE_SIZE;
if ((cbi->mem_high_phys & I386_VM_ADDR_MASK_4MB) <= (phys & I386_VM_ADDR_MASK_4MB))
{
flags |= I386_VM_PWT | I386_VM_PCD;
}
pagedir[i] = phys | flags;
}
}
接下来的pg_mapkernel
比较简单,就是把内核所在的页,去掉I386_VM_USER
属性。
int pg_mapkernel(void)
{
int pde;
u32_t mapped = 0, kern_phys = kern_phys_start;
assert(!(kern_vir_start % I386_BIG_PAGE_SIZE));
assert(!(kern_phys % I386_BIG_PAGE_SIZE));
pde = kern_vir_start / I386_BIG_PAGE_SIZE; /* start pde */
while (mapped < kern_kernlen)
{
pagedir[pde] = kern_phys | I386_VM_PRESENT |
I386_VM_BIGPAGE | I386_VM_WRITE;
mapped += I386_BIG_PAGE_SIZE;
kern_phys += I386_BIG_PAGE_SIZE;
pde++;
}
return pde; /* free pde */
}
然后将页目录的物理地址写入cr3
寄存器
phys_bytes pg_load(void)
{
phys_bytes phpagedir = vir2phys(pagedir);
write_cr3(phpagedir);
return phpagedir;
}
再开启分页
void vm_enable_paging(void)
{
u32_t cr0, cr4;
int pgeok;
pgeok = _cpufeature(_CPUF_I386_PGE);
#ifdef PAE
if (_cpufeature(_CPUF_I386_PAE) == 0)
panic("kernel built with PAE support, CPU seems to lack PAE support?\n");
#endif
cr0 = read_cr0();
cr4 = read_cr4();
/* The boot loader should have put us in protected mode. */
assert(cr0 & I386_CR0_PE);
/* First clear PG and PGE flag, as PGE must be enabled after PG. */
write_cr0(cr0 & ~I386_CR0_PG);
write_cr4(cr4 & ~(I386_CR4_PGE | I386_CR4_PSE));
cr0 = read_cr0();
cr4 = read_cr4();
/* Our page table contains 4MB entries. */
cr4 |= I386_CR4_PSE;
write_cr4(cr4);
/* First enable paging, then enable global page flag. */
cr0 |= I386_CR0_PG;
write_cr0(cr0);
cr0 |= I386_CR0_WP;
write_cr0(cr0);
/* May we enable these features? */
if (pgeok)
cr4 |= I386_CR4_PGE;
write_cr4(cr4);
}
至此pre_init
的工作就完成了。然后就是回到kernel/arch/i386/head.S
的代码,将栈切换到k_initial_stktop
,准备调用kmain
了。
这里的eax
就是pre_init
返回的kinfo
的地址。
mov $k_initial_stktop, %esp
push $0 /* Terminate stack */
push %eax
call _C_LABEL(kmain)
kmain
定义在kernel/main.c
里,其形式如下
void kmain(kinfo_t *local_cbi);