X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=init%2Fmain.c;h=b0a3813e222bc5ac537c4e0ad8c6c577aafb903f;hb=refs%2Fheads%2Fvserver;hp=6849397dd475fbf9942406dfe5a0f6de3817b13c;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/init/main.c b/init/main.c index 6849397dd..b0a3813e2 100644 --- a/init/main.c +++ b/init/main.c @@ -9,13 +9,9 @@ * Simplified starting of init: Michael A. Griffith */ -#define __KERNEL_SYSCALLS__ - -#include #include #include #include -#include #include #include #include @@ -33,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -41,63 +38,71 @@ #include #include #include +#include #include +#include +#include #include #include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include #include - -/* - * This is one of the first .c files built. Error out early - * if we have compiler trouble.. - */ -#if __GNUC__ == 2 && __GNUC_MINOR__ == 96 -#ifdef CONFIG_FRAME_POINTER -#error This compiler cannot compile correctly with frame pointers enabled -#endif -#endif +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #endif /* - * Versions of gcc older than that listed below may actually compile - * and link okay, but the end product can have subtle run time bugs. - * To avoid associated bogus bug reports, we flatly refuse to compile - * with a gcc that is known to be too old from the very beginning. + * This is one of the first .c files built. Error out early if we have compiler + * trouble. + * + * Versions of gcc older than that listed below may actually compile and link + * okay, but the end product can have subtle run time bugs. To avoid associated + * bogus bug reports, we flatly refuse to compile with a gcc that is known to be + * too old from the very beginning. */ -#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95) +#if (__GNUC__ < 3) || (__GNUC__ == 3 && __GNUC_MINOR__ < 2) #error Sorry, your GCC is too old. It builds incorrect kernels. #endif +#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0 +#warning gcc-4.1.0 is known to miscompile the kernel. A different compiler version is recommended. +#endif + static int init(void *); extern void init_IRQ(void); -extern void sock_init(void); extern void fork_init(unsigned long); extern void mca_init(void); extern void sbus_init(void); extern void sysctl_init(void); extern void signals_init(void); -extern void buffer_init(void); extern void pidhash_init(void); extern void pidmap_init(void); extern void prio_tree_init(void); extern void radix_tree_init(void); extern void free_initmem(void); -extern void populate_rootfs(void); -extern void driver_init(void); extern void prepare_namespace(void); #ifdef CONFIG_ACPI extern void acpi_early_init(void); #else static inline void acpi_early_init(void) { } #endif +#ifndef CONFIG_DEBUG_RODATA +static inline void mark_rodata_ro(void) { } +#endif #ifdef CONFIG_TC extern void tc_init(void); @@ -109,8 +114,8 @@ EXPORT_SYMBOL(system_state); /* * Boot command-line arguments */ -#define MAX_INIT_ARGS 32 -#define MAX_INIT_ENVS 32 +#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT +#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT extern void time_init(void); /* Default late time init is NULL. archs can override this later. */ @@ -121,10 +126,23 @@ extern void softirq_init(void); char saved_command_line[COMMAND_LINE_SIZE]; static char *execute_command; +static char *ramdisk_execute_command; /* Setup configured maximum number of CPUs to activate */ static unsigned int max_cpus = NR_CPUS; +/* + * If set, this is an indication to the drivers that reset the underlying + * device before going ahead with the initialization otherwise driver might + * rely on the BIOS and skip the reset operation. + * + * This is useful if kernel is booting in an unreliable environment. + * For ex. kdump situaiton where previous kernel has crashed, BIOS has been + * skipped and devices will be in unknown state. + */ +unsigned int reset_devices; +EXPORT_SYMBOL(reset_devices); + /* * Setup routine for controlling SMP activation * @@ -151,6 +169,14 @@ static int __init maxcpus(char *str) __setup("maxcpus=", maxcpus); +static int __init set_reset_devices(char *str) +{ + reset_devices = 1; + return 1; +} + +__setup("reset_devices", set_reset_devices); + static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; @@ -160,16 +186,19 @@ extern struct obs_kernel_param __setup_start[], __setup_end[]; static int __init obsolete_checksetup(char *line) { struct obs_kernel_param *p; + int had_early_param = 0; p = __setup_start; do { int n = strlen(p->str); if (!strncmp(line, p->str, n)) { if (p->early) { - /* Already done in parse_early_param? (Needs - * exact match on param part) */ + /* Already done in parse_early_param? + * (Needs exact match on param part). + * Keep iterating, as we can have early + * params and __setups of same names 8( */ if (line[n] == '\0' || line[n] == '=') - return 1; + had_early_param = 1; } else if (!p->setup_func) { printk(KERN_WARNING "Parameter %s is obsolete," " ignored\n", p->str); @@ -179,7 +208,8 @@ static int __init obsolete_checksetup(char *line) } p++; } while (p < __setup_end); - return 0; + + return had_early_param; } /* @@ -209,6 +239,14 @@ static int __init quiet_kernel(char *str) __setup("debug", debug_kernel); __setup("quiet", quiet_kernel); +static int __init loglevel(char *str) +{ + get_option(&str, &console_loglevel); + return 1; +} + +__setup("loglevel=", loglevel); + /* * Unknown boot options get handed to init, unless they look like * failed parameters @@ -287,7 +325,17 @@ static int __init init_setup(char *str) } __setup("init=", init_setup); -extern void setup_arch(char **); +static int __init rdinit_setup(char *str) +{ + unsigned int i; + + ramdisk_execute_command = str; + /* See "auto" comment in init_setup */ + for (i = 1; i < MAX_INIT_ARGS; i++) + argv_init[i] = NULL; + return 1; +} +__setup("rdinit=", rdinit_setup); #ifndef CONFIG_SMP @@ -306,16 +354,15 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { } #else #ifdef __GENERIC_PER_CPU -unsigned long __per_cpu_offset[NR_CPUS]; +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); static void __init setup_per_cpu_areas(void) { - unsigned long size, i; + unsigned long size, vspc, i; char *ptr; - /* Created by linker magic */ - extern char __per_cpu_start[], __per_cpu_end[]; + unsigned long nr_possible_cpus = num_possible_cpus(); /* Copy section for each CPU (we discard the original) */ size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); @@ -323,16 +370,22 @@ static void __init setup_per_cpu_areas(void) if (size < PERCPU_ENOUGH_ROOM) size = PERCPU_ENOUGH_ROOM; #endif + vspc = PERCPU_PERCTX * CONFIG_VSERVER_CONTEXTS; + size = ALIGN(size + vspc, SMP_CACHE_BYTES); + ptr = alloc_bootmem(size * nr_possible_cpus); - ptr = alloc_bootmem(size * NR_CPUS); - - for (i = 0; i < NR_CPUS; i++, ptr += size) { + for_each_possible_cpu(i) { __per_cpu_offset[i] = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + ptr += size; } } #endif /* !__GENERIC_PER_CPU */ +#include +#include +#include + /* Called by boot processor to activate the rest. */ static void __init smp_init(void) { @@ -347,14 +400,22 @@ static void __init smp_init(void) } /* Any cleanup work */ - printk("Brought up %ld CPUs\n", (long)num_online_cpus()); + printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); smp_cpus_done(max_cpus); #if 0 /* Get other processors into their bootup holding patterns. */ - smp_threads_ready=1; smp_commence(); #endif + + printk(KERN_DEBUG "sizeof(vma)=%u bytes\n", (unsigned int) sizeof(struct vm_area_struct)); + printk(KERN_DEBUG "sizeof(page)=%u bytes\n", (unsigned int) sizeof(struct page)); + printk(KERN_DEBUG "sizeof(inode)=%u bytes\n", (unsigned int) sizeof(struct inode)); + printk(KERN_DEBUG "sizeof(dentry)=%u bytes\n", (unsigned int) sizeof(struct dentry)); + printk(KERN_DEBUG "sizeof(ext3inode)=%u bytes\n", (unsigned int) sizeof(struct ext3_inode_info)); + printk(KERN_DEBUG "sizeof(buffer_head)=%u bytes\n", (unsigned int) sizeof(struct buffer_head)); + printk(KERN_DEBUG "sizeof(skbuff)=%u bytes\n", (unsigned int) sizeof(struct sk_buff)); + printk(KERN_DEBUG "sizeof(task_struct)=%u bytes\n", (unsigned int) sizeof(struct task_struct)); } #endif @@ -374,7 +435,16 @@ static void noinline rest_init(void) kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND); numa_default_policy(); unlock_kernel(); + + /* + * The boot idle thread must execute schedule() + * at least one to get things moving: + */ preempt_enable_no_resched(); + schedule(); + preempt_disable(); + + /* Call into cpu_idle with preempt disabled */ cpu_idle(); } @@ -413,25 +483,50 @@ void __init parse_early_param(void) * Activate the first processor. */ +static void __init boot_cpu_init(void) +{ + int cpu = smp_processor_id(); + /* Mark the boot cpu "present", "online" etc for SMP and UP case */ + cpu_set(cpu, cpu_online_map); + cpu_set(cpu, cpu_present_map); + cpu_set(cpu, cpu_possible_map); +} + +void __init __attribute__((weak)) smp_setup_processor_id(void) +{ +} + asmlinkage void __init start_kernel(void) { char * command_line; extern struct kernel_param __start___param[], __stop___param[]; + + smp_setup_processor_id(); + + /* + * Need to run as early as possible, to initialize the + * lockdep hash: + */ + unwind_init(); + lockdep_init(); + + local_irq_disable(); + early_boot_irqs_off(); + early_init_irq_lock_class(); + /* * Interrupts are still disabled. Do necessary setups, then * enable them */ lock_kernel(); + boot_cpu_init(); page_address_init(); + printk(KERN_NOTICE); printk(linux_banner); setup_arch(&command_line); + unwind_setup(); setup_per_cpu_areas(); - - /* - * Mark the boot cpu "online" so that it can call console drivers in - * printk() and can access its per-cpu storage. - */ - smp_prepare_boot_cpu(); + smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ /* * Set up the scheduler prior starting any interrupts (such as the @@ -446,19 +541,31 @@ asmlinkage void __init start_kernel(void) preempt_disable(); build_all_zonelists(); page_alloc_init(); - printk("Kernel command line: %s\n", saved_command_line); + printk(KERN_NOTICE "Kernel command line: %s\n", saved_command_line); parse_early_param(); parse_args("Booting kernel", command_line, __start___param, __stop___param - __start___param, &unknown_bootoption); + if (!irqs_disabled()) { + printk(KERN_WARNING "start_kernel(): bug: interrupts were " + "enabled *very* early, fixing it\n"); + local_irq_disable(); + } sort_main_extable(); trap_init(); rcu_init(); init_IRQ(); pidhash_init(); init_timers(); + hrtimers_init(); softirq_init(); + timekeeping_init(); time_init(); + profile_init(); + if (!irqs_disabled()) + printk("start_kernel(): bug: interrupts were enabled early\n"); + early_boot_irqs_on(); + local_irq_enable(); /* * HACK ALERT! This is early. We're enabling the console before @@ -468,8 +575,16 @@ asmlinkage void __init start_kernel(void) console_init(); if (panic_later) panic(panic_later, panic_param); - profile_init(); - local_irq_enable(); + + lockdep_info(); + + /* + * Need to run this when irqs are enabled, because it wants + * to self-test [hard/soft]-irqs on/off lock inversion bugs + * too: + */ + locking_selftest(); + #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && initrd_start < min_low_pfn << PAGE_SHIFT) { @@ -479,8 +594,10 @@ asmlinkage void __init start_kernel(void) } #endif vfs_caches_init_early(); + cpuset_init_early(); mem_init(); kmem_cache_init(); + setup_per_cpu_pageset(); numa_policy_init(); if (late_time_init) late_time_init(); @@ -497,6 +614,7 @@ asmlinkage void __init start_kernel(void) proc_caches_init(); buffer_init(); unnamed_dev_init(); + key_init(); security_init(); vfs_caches_init(num_physpages); radix_tree_init(); @@ -506,6 +624,10 @@ asmlinkage void __init start_kernel(void) #ifdef CONFIG_PROC_FS proc_root_init(); #endif + cpuset_init(); + taskstats_init_early(); + delayacct_init(); + check_bugs(); acpi_early_init(); /* before LAPIC and SMP init */ @@ -523,8 +645,6 @@ static int __init initcall_debug_setup(char *str) } __setup("initcall_debug", initcall_debug_setup); -struct task_struct *child_reaper = &init_task; - extern initcall_t __initcall_start[], __initcall_end[]; static void __init do_initcalls(void) @@ -533,17 +653,23 @@ static void __init do_initcalls(void) int count = preempt_count(); for (call = __initcall_start; call < __initcall_end; call++) { - char *msg; + char *msg = NULL; + char msgbuf[40]; + int result; if (initcall_debug) { - printk(KERN_DEBUG "Calling initcall 0x%p", *call); - print_fn_descriptor_symbol(": %s()", (unsigned long) *call); + printk("Calling initcall 0x%p", *call); + print_fn_descriptor_symbol(": %s()", + (unsigned long) *call); printk("\n"); } - (*call)(); + result = (*call)(); - msg = NULL; + if (result && result != -ENODEV && initcall_debug) { + sprintf(msgbuf, "error code %d", result); + msg = msgbuf; + } if (preempt_count() != count) { msg = "preemption imbalance"; preempt_count() = count; @@ -553,8 +679,10 @@ static void __init do_initcalls(void) local_irq_enable(); } if (msg) { - printk("error in initcall at 0x%p: " - "returned with %s\n", *call, msg); + printk(KERN_WARNING "initcall at 0x%p", *call); + print_fn_descriptor_symbol(": %s()", + (unsigned long) *call); + printk(": returned with %s\n", msg); } } @@ -562,6 +690,30 @@ static void __init do_initcalls(void) flush_scheduled_work(); } +#ifdef CONFIG_BOOT_DELAY + +unsigned int boot_delay = 0; /* msecs delay after each printk during bootup */ +extern long preset_lpj; +unsigned long long printk_delay_msec = 0; /* per msec, based on boot_delay */ + +static int __init boot_delay_setup(char *str) +{ + unsigned long lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */ + unsigned long long loops_per_msec = lpj / 1000 * CONFIG_HZ; + + get_option(&str, &boot_delay); + if (boot_delay > 10 * 1000) + boot_delay = 0; + + printk_delay_msec = loops_per_msec; + printk("boot_delay: %u, preset_lpj: %ld, lpj: %lu, CONFIG_HZ: %d, printk_delay_msec: %llu\n", + boot_delay, preset_lpj, lpj, CONFIG_HZ, printk_delay_msec); + return 1; +} +__setup("boot_delay=", boot_delay_setup); + +#endif + /* * Ok, the machine is now initialized. None of the devices * have been touched yet, but the CPU subsystem is up and @@ -574,20 +726,25 @@ static void __init do_basic_setup(void) /* drivers will send hotplug events */ init_workqueues(); usermodehelper_init(); - key_init(); driver_init(); #ifdef CONFIG_SYSCTL sysctl_init(); #endif - /* Networking initialization needs a process context */ - sock_init(); - do_initcalls(); } -static void do_pre_smp_initcalls(void) +static int __initdata nosoftlockup; + +static int __init nosoftlockup_setup(char *str) +{ + nosoftlockup = 1; + return 1; +} +__setup("nosoftlockup", nosoftlockup_setup); + +static void __init do_pre_smp_initcalls(void) { extern int spawn_ksoftirqd(void); #ifdef CONFIG_SMP @@ -596,35 +753,23 @@ static void do_pre_smp_initcalls(void) migration_init(); #endif spawn_ksoftirqd(); + if (!nosoftlockup) + spawn_softlockup_task(); } static void run_init_process(char *init_filename) { argv_init[0] = init_filename; - execve(init_filename, argv_init, envp_init); -} - -static inline void fixup_cpu_present_map(void) -{ -#ifdef CONFIG_SMP - int i; - - /* - * If arch is not hotplug ready and did not populate - * cpu_present_map, just make cpu_present_map same as cpu_possible_map - * for other cpu bringup code to function as normal. e.g smp_init() etc. - */ - if (cpus_empty(cpu_present_map)) { - for_each_cpu(i) { - cpu_set(i, cpu_present_map); - } - } -#endif + kernel_execve(init_filename, argv_init, envp_init); } static int init(void * unused) { lock_kernel(); + /* + * init can run on any cpu. + */ + set_cpus_allowed(current, CPU_MASK_ALL); /* * Tell the world that we're going to be the grim * reaper of innocent orphaned children. @@ -633,22 +778,18 @@ static int init(void * unused) * assumptions about where in the task array this * can be found. */ - child_reaper = current; + init_pid_ns.child_reaper = current; + + cad_pid = task_pid(current); - /* Sets up cpus_possible() */ smp_prepare_cpus(max_cpus); do_pre_smp_initcalls(); - fixup_cpu_present_map(); smp_init(); sched_init_smp(); - /* - * Do this before initcalls, because some drivers want to access - * firmware files. - */ - populate_rootfs(); + cpuset_init_smp(); do_basic_setup(); @@ -656,10 +797,14 @@ static int init(void * unused) * check if there is an early userspace init. If yes, let it do all * the work */ - if (sys_access((const char __user *) "/init", 0) == 0) - execute_command = "/init"; - else + + if (!ramdisk_execute_command) + ramdisk_execute_command = "/init"; + + if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) { + ramdisk_execute_command = NULL; prepare_namespace(); + } /* * Ok, we have completed the initial bootup, and @@ -668,25 +813,33 @@ static int init(void * unused) */ free_initmem(); unlock_kernel(); + mark_rodata_ro(); system_state = SYSTEM_RUNNING; numa_default_policy(); if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) - printk("Warning: unable to open an initial console.\n"); + printk(KERN_WARNING "Warning: unable to open an initial console.\n"); (void) sys_dup(0); (void) sys_dup(0); - + + if (ramdisk_execute_command) { + run_init_process(ramdisk_execute_command); + printk(KERN_WARNING "Failed to execute %s\n", + ramdisk_execute_command); + } + /* * We try each of these until one succeeds. * * The Bourne shell can be used instead of init if we are * trying to recover a really broken machine. */ - - if (execute_command) + if (execute_command) { run_init_process(execute_command); - + printk(KERN_WARNING "Failed to execute %s. Attempting " + "defaults...\n", execute_command); + } run_init_process("/sbin/init"); run_init_process("/etc/init"); run_init_process("/bin/init");