操作系统实验三
基于 Linux 6.6.27
为什么我的initramfs里明明没有 /dev /root两个目录 和/dev/console的文件,而在启动init脚本后,会出现这些文件夹和文件。
所以写个C程序,替换掉init脚本,这个程序的目的的就是直接列出 / 目录下所有文件名
#include
#include
int main() {
DIR *dir;
struct dirent *entry;
dir = opendir("/");
if (dir == NULL) {
perror("Error opening root directory");
return 1;
}
while ((entry = readdir(dir)) != NULL) {
printf("%s\n", entry->d_name);
}
closedir(dir);
return 0;
}
需要静态编译 不然的话会:
[ 1.910634][ T1] Run /init as init process
[ 1.912440][ T1] Failed to execute /init (error -2)
经过测试,列出如下
.
..
dev
root
bin
init
这说明,并不是因为加载脚本默认创建的,而是在内核一早就准备好了的。
那是在什么时候创建的呢?
接下来,在内核代码init/initramfs.c
里添加这段代码
static bool print_file_name(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type)
{
printk(" rootfs / file: %.*s\n", namlen, name);
return true;
}
static void print_rootfs_files(const char *rootfs_path, const char *func, int line)
{
struct file *file;
struct dir_context ctx = {
.actor = print_file_name,
};
printk("%s:%s:%d", __func__, func, line);
file = filp_open(rootfs_path, O_RDONLY | O_DIRECTORY, 0);
if (IS_ERR(file)) {
pr_err("Failed to open rootfs directory: %ld\n", PTR_ERR(file));
return;
}
iterate_dir(file, &ctx);
filp_close(file, NULL);
}
并在unpack_to_rootfs
之后调用
得到结果
[ 0.832556][ T29] print_rootfs_files:do_populate_rootfs:724
[ 0.832556][ T29] rootfs / file: .
[ 0.832556][ T29] rootfs / file: ..
[ 0.836995][ T29] print_rootfs_files:do_populate_rootfs:726
[ 0.837040][ T29] rootfs / file: .
[ 0.837185][ T29] rootfs / file: ..
[ 0.837307][ T29] rootfs / file: dev
[ 0.837412][ T29] rootfs / file: root
[ 0.837545][ T29] Unpacking initramfs...
[ 0.845110][ T29] unpack_to_rootfs filp open bin/busybox
[ 0.918683][ T29] unpack_to_rootfs filp open init
[ 0.935362][ T29] print_rootfs_files:do_populate_rootfs:739
[ 0.935439][ T29] rootfs / file: .
[ 0.935592][ T29] rootfs / file: ..
[ 0.935592][ T29] rootfs / file: dev
[ 0.935592][ T29] rootfs / file: root
[ 0.935592][ T29] rootfs / file: bin
[ 0.935592][ T29] rootfs / file: init
可以看出在do_populate_rootfs
第一次调用unpack_to_rootfs
前就没有 /dev 和 /root 但是调用之后就已经创建好了 /dev 和 /root
static void __init do_populate_rootfs(void *unused, async_cookie_t cookie)
{
/* Load the built in initramfs */
char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
if (err)
panic_show_mem("%s", err); /* Failed to decompress INTERNAL initramfs */
if (!initrd_start || IS_ENABLED(CONFIG_INITRAMFS_FORCE))
goto done;
if (IS_ENABLED(CONFIG_BLK_DEV_RAM))
printk(KERN_INFO "Trying to unpack rootfs image as initramfs...\n");
else
printk(KERN_INFO "Unpacking initramfs...\n");
err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start);
if (err) {
#ifdef CONFIG_BLK_DEV_RAM
populate_initrd_image(err);
#else
printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
#endif
}
done:
/*
* If the initrd region is overlapped with crashkernel reserved region,
* free only memory that is not part of crashkernel region.
*/
if (!do_retain_initrd && initrd_start && !kexec_free_initrd())
free_initrd_mem(initrd_start, initrd_end);
initrd_start = 0;
initrd_end = 0;
flush_delayed_fput();
task_work_run();
}
可以看到这两次unpack_to_rootfs
是有明显的差异的
一次是:unpack_to_rootfs(__initramfs_start, __initramfs_size);
这行代码处理的是内嵌在内核映像中的 initramfs。在编译内核时,可以选择将 initramfs 直接编译进内核映像中。__initramfs_start 和 __initramfs_size 是由编译器在链接阶段设置的,它们分别表示内嵌 initramfs 的开始地址和大小。 内嵌 initramfs 的生成过程是通过内核的配置和编译系统(Kconfig 和 Makefile)完成的。具体来说,在内核配置文件(通常是 .config 文件)中,有一个选项叫做 CONFIG_INITRAMFS_SOURCE,它指定了用于生成内嵌 initramfs 的源文件或目录的路径。
# usr/Makefile
ramfs-input := $(CONFIG_INITRAMFS_SOURCE)
cpio-data :=
# If CONFIG_INITRAMFS_SOURCE is empty, generate a small initramfs with the
# default contents.
ifeq ($(ramfs-input),)
ramfs-input := $(srctree)/$(src)/default_cpio_list
endif
usr/default_cpio_list
的内容为
# SPDX-License-Identifier: GPL-2.0-only
# This is a very simple, default initramfs
dir /dev 0755 0 0
nod /dev/console 0600 0 0 c 5 1
dir /root 0700 0 0
一次是:unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start);
这行代码处理的是传递给内核的旧式 initrd 映像。initrd 是一种旧式的 RAM 磁盘技术,它和 initramfs 功能类似,但是使用方式不同。initrd 映像是一个单独的文件,需要在引导加载器(如 GRUB)中显式地传递给内核。initrd_start 和 initrd_end 由引导加载器设置,它们表示 initrd 映像在内存中的开始和结束地址。
因此 /dev /root /dev/console 不是initramfs.cpio.gz里带的,也不是内核创建的。而是和内核嵌在一起的initramfs里的内容。
修改usr/default_cpio_list
的内容,重新编译启动内核就能轻松验证。
如果在启动内核的时候没有指定initramfs.cpio.gz或,里面没有init程序,或参数rdinit指定的程序不存在,内核都会自己尝试挂载根文件系统【但要指定root参数】
如果不使用initramfs,让内核自己挂载根文件系统,init进程不能再用rdinit参数指定,默认用 /sbin/init /etc/init /bin/init /bin/sh等
下面来看内核自己挂载硬盘(以/dev/sda1为例)为根目录的情况【可以删除initramfs.cpio.gz里的init,或不指定initramfs.cpio.gz来触发】
prepare_namespace调用mount_root将/dev/sda1挂载到/root目录下 devtmpfs_mount将devtmpfs挂载到/dev目录下 init_mount将当前进程的根目录更改为挂载的根文件系统,在这个调用中,. 表示当前进程的根目录,/ 表示要更改为的根目录,MS_MOVE 是挂载标志,表示将当前进程的根目录移动到新的根目录。 init_chroot将当前进程的工作目录更改为挂载的根文件系统。这个函数调用实际上是将当前进程的工作目录更改为其根目录,这样内核就可以访问挂载的根文件系统中的文件。
// init/do_mounts.c
/*
* Prepare the namespace - decide what/where to mount, load ramdisks, etc.
*/
void __init prepare_namespace(void)
{
if (root_delay) {
printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
root_delay);
ssleep(root_delay);
}
/*
* wait for the known devices to complete their probing
*
* Note: this is a potential source of long boot delays.
* For example, it is not atypical to wait 5 seconds here
* for the touchpad of a laptop to initialize.
*/
wait_for_device_probe();
md_run_setup();
if (saved_root_name[0])
ROOT_DEV = parse_root_device(saved_root_name);
if (initrd_load(saved_root_name))
goto out;
if (root_wait)
wait_for_root(saved_root_name);
mount_root(saved_root_name);
out:
devtmpfs_mount();
init_mount(".", "/", NULL, MS_MOVE, NULL);
init_chroot(".");
}
在init_mount(".", "/", NULL, MS_MOVE, NULL);
将进程的当前目录挂载为’/‘根目录后。根目录就是硬盘的目录了。
但在init_mount之后调用print_rootfs_files会发现还是rootfs的目录 /dev
、/dev/root
、/dev/console
、/root
在init_chroot之后调用print_rootfs_files才是硬盘目录
void __init mount_root(char *root_device_name)
{
switch (ROOT_DEV) {
case Root_NFS:
mount_nfs_root();
break;
case Root_CIFS:
mount_cifs_root();
break;
case Root_Generic:
mount_root_generic(root_device_name, root_device_name,
root_mountflags);
break;
case 0:
if (root_device_name && root_fs_names &&
mount_nodev_root(root_device_name) == 0)
break;
fallthrough;
default:
mount_block_root(root_device_name);
break;
}
}
在mount_block_root里,在rootfs的/dev里创建一个root设备节点
// init/do_mounts.c
static void __init mount_block_root(char *root_device_name)
{
int err = create_dev("/dev/root", ROOT_DEV);
if (err < 0)
pr_emerg("Failed to create /dev/root: %d\n", err);
mount_root_generic("/dev/root", root_device_name, root_mountflags);
}
然后使用mount_root_generic将/dev/root挂载到rootfs上
由于此处只有设备节点,但是没有文件系统信息,所以mount_root_generic会遍历内核所有的文件系统名,依次来尝试用do_mount_root挂载/dev/root到rootfs的/root目录。
void __init mount_root_generic(char *name, char *pretty_name, int flags)
{
struct page *page = alloc_page(GFP_KERNEL);
char *fs_names = page_address(page);
char *p;
char b[BDEVNAME_SIZE];
int num_fs, i;
scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
if (root_fs_names)
num_fs = split_fs_names(fs_names, PAGE_SIZE);
else
num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE);
retry:
for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1) {
int err;
if (!*p)
continue;
err = do_mount_root(name, p, flags, root_mount_data);
switch (err) {
case 0:
goto out;
case -EACCES:
case -EINVAL:
continue;
}
/*
* Allow the user to distinguish between failed sys_open
* and bad superblock on root device.
* and give them a list of the available devices
*/
printk("VFS: Cannot open root device \"%s\" or %s: error %d\n",
pretty_name, b, err);
printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
printk_all_partitions();
if (root_fs_names)
num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE);
if (!num_fs)
pr_err("Can't find any bdev filesystem to be used for mount!\n");
else {
pr_err("List of all bdev filesystems:\n");
for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1)
pr_err(" %s", p);
pr_err("\n");
}
panic("VFS: Unable to mount root fs on %s", b);
}
if (!(flags & SB_RDONLY)) {
flags |= SB_RDONLY;
goto retry;
}
printk("List of all partitions:\n");
printk_all_partitions();
printk("No filesystem could mount root, tried: ");
for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1)
printk(" %s", p);
printk("\n");
panic("VFS: Unable to mount root fs on %s", b);
out:
put_page(page);
}
为什么是挂载到/root目录呢,可以在do_mount_root逻辑里看到init_mount(name, "/root", fs, flags, data_page);
即 init_mount("/dev/root", "/root", fs. flags. data_page)
这个/root目录可以换成别的目录吗? 当然可以,假设为/rootA。 先修改usr/default_cpio_list,dir /rootA 0700 0 0
。然后修改do_mount_root函数里的init_mount和init_chdir里的/root为/rootA
static int __init do_mount_root(const char *name, const char *fs,
const int flags, const void *data)
{
struct super_block *s;
struct page *p = NULL;
char *data_page = NULL;
int ret;
if (data) {
/* init_mount() requires a full page as fifth argument */
p = alloc_page(GFP_KERNEL);
if (!p)
return -ENOMEM;
data_page = page_address(p);
/* zero-pad. init_mount() will make sure it's terminated */
strncpy(data_page, data, PAGE_SIZE);
}
ret = init_mount(name, "/root", fs, flags, data_page);
if (ret)
goto out;
init_chdir("/root");
s = current->fs->pwd.dentry->d_sb;
ROOT_DEV = s->s_dev;
printk(KERN_INFO
"VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
s->s_type->name,
sb_rdonly(s) ? " readonly" : "",
MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
out:
if (p)
put_page(p);
return ret;
}
补充
// fs/namespace.c
void __init mnt_init(void)
{
int err;
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
mount_hashtable = alloc_large_system_hash("Mount-cache",
sizeof(struct hlist_head),
mhash_entries, 19,
HASH_ZERO,
&m_hash_shift, &m_hash_mask, 0, 0);
mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
sizeof(struct hlist_head),
mphash_entries, 19,
HASH_ZERO,
&mp_hash_shift, &mp_hash_mask, 0, 0);
if (!mount_hashtable || !mountpoint_hashtable)
panic("Failed to allocate mount hash table\n");
kernfs_init();
err = sysfs_init();
if (err)
printk(KERN_WARNING "%s: sysfs_init error: %d\n",
__func__, err);
fs_kobj = kobject_create_and_add("fs", NULL);
if (!fs_kobj)
printk(KERN_WARNING "%s: kobj create error\n", __func__);
shmem_init();
init_rootfs();
init_mount_tree();
}
// init/do_mounts.c
void __init init_rootfs(void)
{
if (IS_ENABLED(CONFIG_TMPFS)) {
if (!saved_root_name[0] && !root_fs_names)
{
is_tmpfs = true;
}
else if (root_fs_names && !!strstr(root_fs_names, "tmpfs"))
{
is_tmpfs = true;
}
}
}
// fs/namespace.c
struct vfsmount *vfs_kern_mount(struct file_system_type *type,
int flags, const char *name,
void *data)
{
struct fs_context *fc;
struct vfsmount *mnt;
int ret = 0;
if (!type)
return ERR_PTR(-EINVAL);
fc = fs_context_for_mount(type, flags);
if (IS_ERR(fc))
return ERR_CAST(fc);
if (name)
ret = vfs_parse_fs_string(fc, "source",
name, strlen(name));
if (!ret)
ret = parse_monolithic_mount_data(fc, data);
if (!ret)
mnt = fc_mount(fc);
else
mnt = ERR_PTR(ret);
put_fs_context(fc);
return mnt;
}
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
struct mount *m;
struct mnt_namespace *ns;
struct path root;
mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
if (IS_ERR(mnt))
panic("Can't create rootfs");
ns = alloc_mnt_ns(&init_user_ns, false);
if (IS_ERR(ns))
panic("Can't allocate initial namespace");
m = real_mount(mnt);
m->mnt_ns = ns;
ns->root = m;
ns->mounts = 1;
list_add(&m->mnt_list, &ns->list);
init_task.nsproxy->mnt_ns = ns;
get_mnt_ns(ns);
root.mnt = mnt;
root.dentry = mnt->mnt_root;
mnt->mnt_flags |= MNT_LOCKED;
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
}
void __init vfs_caches_init(void)
{
// ...
mnt_init();
// ...
}
void start_kernel(void)
{
// ...
vfs_caches_init();
// ...
arch_call_rest_init();
}
noinline void __ref __noreturn rest_init(void)
{
// ...
pid = user_mode_thread(kernel_init, NULL, CLONE_FS);
// ...
}
void __init __weak __noreturn arch_call_rest_init(void)
{
rest_init();
}
static noinline void __init kernel_init_freeable(void)
{
// ...
wait_for_initramfs();
console_on_rootfs();
/*
* check if there is an early userspace init. If yes, let it do all
* the work
*/
if (init_eaccess(ramdisk_execute_command) != 0) {
ramdisk_execute_command = NULL;
prepare_namespace();
}
// ...
}
static int __ref kernel_init(void *unused)
{
int ret;
/*
* Wait until kthreadd is all set-up.
*/
wait_for_completion(&kthreadd_done);
kernel_init_freeable();
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
system_state = SYSTEM_FREEING_INITMEM;
kprobe_free_init_mem();
ftrace_free_init_mem();
kgdb_free_init_mem();
exit_boot_config();
free_initmem();
mark_readonly();
/*
* Kernel mappings are now finalized - update the userspace page-table
* to finalize PTI.
*/
pti_finalize();
system_state = SYSTEM_RUNNING;
numa_default_policy();
rcu_end_inkernel_boot();
do_sysctl_args();
if (ramdisk_execute_command) {
ret = run_init_process(ramdisk_execute_command);
if (!ret)
return 0;
pr_err("Failed to execute %s (error %d)\n",
ramdisk_execute_command, ret);
}
/*
* We try each of these until one succeeds.
*
* The Bourne shell can be used instead of init if we are
* trying to recover a really broken machine.
*/
if (execute_command) {
ret = run_init_process(execute_command);
if (!ret)
return 0;
panic("Requested init %s failed (error %d).",
execute_command, ret);
}
if (CONFIG_DEFAULT_INIT[0] != '\0') {
ret = run_init_process(CONFIG_DEFAULT_INIT);
if (ret)
pr_err("Default init %s failed (error %d)\n",
CONFIG_DEFAULT_INIT, ret);
else
return 0;
}
if (!try_to_run_init_process("/sbin/init") ||
!try_to_run_init_process("/etc/init") ||
!try_to_run_init_process("/bin/init") ||
!try_to_run_init_process("/bin/sh"))
return 0;
panic("No working init found. Try passing init= option to kernel. "
"See Linux Documentation/admin-guide/init.rst for guidance.");
}
start_kernel
-> vfs_caches_init
-> mnt_init
-> init_rootfs
-> init_mount_tree
-> vfs_kern_mount 【初始化rootfs:mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);】
-> 在内核挂载了rootfs后,会将当前进程的pwd和root目录设置为rootfs的根目录。
-> arch_call_rest_init -> rest_init -> user_mode_thread:kernel_init
-> prepare_namespace
-> mount_root
-> mount_block_root 【创建/dev/root】
-> mount_root_generic 【将/dev/root并挂载到/root】
-> do_mount_root 【根据内核所有fs的名字试着挂载/dev/root到/root】
-> init_chdir 【使用init_chdir将进程的当前目录设置为/root】
-> devtmpfs_mount
-> init_mount
-> init_chroot