X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=init%2Fmain.c;h=b0a3813e222bc5ac537c4e0ad8c6c577aafb903f;hb=a2f44b27303a5353859d77a3e96a1d3f33f56ab7;hp=4efd7b84f9e6748ec143397bcd69421594220aaa;hpb=f934759a50056d3162a39713173d9c8064a2d6dd;p=linux-2.6.git diff --git a/init/main.c b/init/main.c index 4efd7b84f..b0a3813e2 100644 --- a/init/main.c +++ b/init/main.c @@ -9,13 +9,9 @@ * Simplified starting of init: Michael A. Griffith */ -#define __KERNEL_SYSCALLS__ - -#include #include #include #include -#include #include #include #include @@ -33,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -41,67 +38,71 @@ #include #include #include +#include #include +#include +#include #include #include #include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include #include - -/* - * This is one of the first .c files built. Error out early - * if we have compiler trouble.. - */ -#if __GNUC__ == 2 && __GNUC_MINOR__ == 96 -#ifdef CONFIG_FRAME_POINTER -#error This compiler cannot compile correctly with frame pointers enabled -#endif -#endif +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #endif /* - * Versions of gcc older than that listed below may actually compile - * and link okay, but the end product can have subtle run time bugs. - * To avoid associated bogus bug reports, we flatly refuse to compile - * with a gcc that is known to be too old from the very beginning. + * This is one of the first .c files built. Error out early if we have compiler + * trouble. + * + * Versions of gcc older than that listed below may actually compile and link + * okay, but the end product can have subtle run time bugs. To avoid associated + * bogus bug reports, we flatly refuse to compile with a gcc that is known to be + * too old from the very beginning. */ -#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95) +#if (__GNUC__ < 3) || (__GNUC__ == 3 && __GNUC_MINOR__ < 2) #error Sorry, your GCC is too old. It builds incorrect kernels. #endif -extern char *linux_banner; +#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0 +#warning gcc-4.1.0 is known to miscompile the kernel. A different compiler version is recommended. +#endif static int init(void *); extern void init_IRQ(void); -extern void sock_init(void); extern void fork_init(unsigned long); extern void mca_init(void); extern void sbus_init(void); extern void sysctl_init(void); extern void signals_init(void); -extern void buffer_init(void); extern void pidhash_init(void); extern void pidmap_init(void); extern void prio_tree_init(void); extern void radix_tree_init(void); extern void free_initmem(void); -extern void populate_rootfs(void); -extern void driver_init(void); extern void prepare_namespace(void); #ifdef CONFIG_ACPI extern void acpi_early_init(void); #else static inline void acpi_early_init(void) { } #endif +#ifndef CONFIG_DEBUG_RODATA +static inline void mark_rodata_ro(void) { } +#endif #ifdef CONFIG_TC extern void tc_init(void); @@ -110,21 +111,11 @@ extern void tc_init(void); enum system_states system_state; EXPORT_SYMBOL(system_state); -/* - * The kernel_magic value represents the address of _end, which allows - * namelist tools to "match" each other respectively. That way a tool - * that looks at /dev/mem can verify that it is using the right System.map - * file -- if kernel_magic doesn't equal the namelist value of _end, - * something's wrong. - */ -extern unsigned long _end; -unsigned long *kernel_magic = &_end; - /* * Boot command-line arguments */ -#define MAX_INIT_ARGS 32 -#define MAX_INIT_ENVS 32 +#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT +#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT extern void time_init(void); /* Default late time init is NULL. archs can override this later. */ @@ -135,10 +126,23 @@ extern void softirq_init(void); char saved_command_line[COMMAND_LINE_SIZE]; static char *execute_command; +static char *ramdisk_execute_command; /* Setup configured maximum number of CPUs to activate */ static unsigned int max_cpus = NR_CPUS; +/* + * If set, this is an indication to the drivers that reset the underlying + * device before going ahead with the initialization otherwise driver might + * rely on the BIOS and skip the reset operation. + * + * This is useful if kernel is booting in an unreliable environment. + * For ex. kdump situaiton where previous kernel has crashed, BIOS has been + * skipped and devices will be in unknown state. + */ +unsigned int reset_devices; +EXPORT_SYMBOL(reset_devices); + /* * Setup routine for controlling SMP activation * @@ -165,24 +169,36 @@ static int __init maxcpus(char *str) __setup("maxcpus=", maxcpus); +static int __init set_reset_devices(char *str) +{ + reset_devices = 1; + return 1; +} + +__setup("reset_devices", set_reset_devices); + static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; +extern struct obs_kernel_param __setup_start[], __setup_end[]; + static int __init obsolete_checksetup(char *line) { struct obs_kernel_param *p; - extern struct obs_kernel_param __setup_start, __setup_end; + int had_early_param = 0; - p = &__setup_start; + p = __setup_start; do { int n = strlen(p->str); if (!strncmp(line, p->str, n)) { if (p->early) { - /* Already done in parse_early_param? (Needs - * exact match on param part) */ + /* Already done in parse_early_param? + * (Needs exact match on param part). + * Keep iterating, as we can have early + * params and __setups of same names 8( */ if (line[n] == '\0' || line[n] == '=') - return 1; + had_early_param = 1; } else if (!p->setup_func) { printk(KERN_WARNING "Parameter %s is obsolete," " ignored\n", p->str); @@ -191,19 +207,11 @@ static int __init obsolete_checksetup(char *line) return 1; } p++; - } while (p < &__setup_end); - return 0; -} + } while (p < __setup_end); -static unsigned long preset_lpj; -static int __init lpj_setup(char *str) -{ - preset_lpj = simple_strtoul(str,NULL,0); - return 1; + return had_early_param; } -__setup("lpj=", lpj_setup); - /* * This should be approx 2 Bo*oMips to start (note initial shift), and will * still work even if initially too large, it will just take slightly longer @@ -212,67 +220,6 @@ unsigned long loops_per_jiffy = (1<<12); EXPORT_SYMBOL(loops_per_jiffy); -/* - * This is the number of bits of precision for the loops_per_jiffy. Each - * bit takes on average 1.5/HZ seconds. This (like the original) is a little - * better than 1% - */ -#define LPS_PREC 8 - -void __devinit calibrate_delay(void) -{ - unsigned long ticks, loopbit; - int lps_precision = LPS_PREC; - - if (preset_lpj) { - loops_per_jiffy = preset_lpj; - printk("Calibrating delay loop (skipped)... " - "%lu.%02lu BogoMIPS preset\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100); - } else { - loops_per_jiffy = (1<<12); - - printk(KERN_DEBUG "Calibrating delay loop... "); - while ((loops_per_jiffy <<= 1) != 0) { - /* wait for "start of" clock tick */ - ticks = jiffies; - while (ticks == jiffies) - /* nothing */; - /* Go .. */ - ticks = jiffies; - __delay(loops_per_jiffy); - ticks = jiffies - ticks; - if (ticks) - break; - } - - /* - * Do a binary approximation to get loops_per_jiffy set to - * equal one clock (up to lps_precision bits) - */ - loops_per_jiffy >>= 1; - loopbit = loops_per_jiffy; - while (lps_precision-- && (loopbit >>= 1)) { - loops_per_jiffy |= loopbit; - ticks = jiffies; - while (ticks == jiffies) - /* nothing */; - ticks = jiffies; - __delay(loops_per_jiffy); - if (jiffies != ticks) /* longer than 1 tick */ - loops_per_jiffy &= ~loopbit; - } - - /* Round the value and print it */ - printk("%lu.%02lu BogoMIPS (lpj=%lu)\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100, - loops_per_jiffy); - } - -} - static int __init debug_kernel(char *str) { if (*str) @@ -292,6 +239,14 @@ static int __init quiet_kernel(char *str) __setup("debug", debug_kernel); __setup("quiet", quiet_kernel); +static int __init loglevel(char *str) +{ + get_option(&str, &console_loglevel); + return 1; +} + +__setup("loglevel=", loglevel); + /* * Unknown boot options get handed to init, unless they look like * failed parameters @@ -370,8 +325,17 @@ static int __init init_setup(char *str) } __setup("init=", init_setup); -extern void setup_arch(char **); -extern void cpu_idle(void); +static int __init rdinit_setup(char *str) +{ + unsigned int i; + + ramdisk_execute_command = str; + /* See "auto" comment in init_setup */ + for (i = 1; i < MAX_INIT_ARGS; i++) + argv_init[i] = NULL; + return 1; +} +__setup("rdinit=", rdinit_setup); #ifndef CONFIG_SMP @@ -390,16 +354,15 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { } #else #ifdef __GENERIC_PER_CPU -unsigned long __per_cpu_offset[NR_CPUS]; +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); static void __init setup_per_cpu_areas(void) { - unsigned long size, i; + unsigned long size, vspc, i; char *ptr; - /* Created by linker magic */ - extern char __per_cpu_start[], __per_cpu_end[]; + unsigned long nr_possible_cpus = num_possible_cpus(); /* Copy section for each CPU (we discard the original) */ size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); @@ -407,16 +370,22 @@ static void __init setup_per_cpu_areas(void) if (size < PERCPU_ENOUGH_ROOM) size = PERCPU_ENOUGH_ROOM; #endif + vspc = PERCPU_PERCTX * CONFIG_VSERVER_CONTEXTS; + size = ALIGN(size + vspc, SMP_CACHE_BYTES); + ptr = alloc_bootmem(size * nr_possible_cpus); - ptr = alloc_bootmem(size * NR_CPUS); - - for (i = 0; i < NR_CPUS; i++, ptr += size) { + for_each_possible_cpu(i) { __per_cpu_offset[i] = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + ptr += size; } } #endif /* !__GENERIC_PER_CPU */ +#include +#include +#include + /* Called by boot processor to activate the rest. */ static void __init smp_init(void) { @@ -431,14 +400,22 @@ static void __init smp_init(void) } /* Any cleanup work */ - printk("Brought up %ld CPUs\n", (long)num_online_cpus()); + printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); smp_cpus_done(max_cpus); #if 0 /* Get other processors into their bootup holding patterns. */ - smp_threads_ready=1; smp_commence(); #endif + + printk(KERN_DEBUG "sizeof(vma)=%u bytes\n", (unsigned int) sizeof(struct vm_area_struct)); + printk(KERN_DEBUG "sizeof(page)=%u bytes\n", (unsigned int) sizeof(struct page)); + printk(KERN_DEBUG "sizeof(inode)=%u bytes\n", (unsigned int) sizeof(struct inode)); + printk(KERN_DEBUG "sizeof(dentry)=%u bytes\n", (unsigned int) sizeof(struct dentry)); + printk(KERN_DEBUG "sizeof(ext3inode)=%u bytes\n", (unsigned int) sizeof(struct ext3_inode_info)); + printk(KERN_DEBUG "sizeof(buffer_head)=%u bytes\n", (unsigned int) sizeof(struct buffer_head)); + printk(KERN_DEBUG "sizeof(skbuff)=%u bytes\n", (unsigned int) sizeof(struct sk_buff)); + printk(KERN_DEBUG "sizeof(task_struct)=%u bytes\n", (unsigned int) sizeof(struct task_struct)); } #endif @@ -458,16 +435,25 @@ static void noinline rest_init(void) kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND); numa_default_policy(); unlock_kernel(); - cpu_idle(); + + /* + * The boot idle thread must execute schedule() + * at least one to get things moving: + */ + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + + /* Call into cpu_idle with preempt disabled */ + cpu_idle(); } /* Check for early params. */ static int __init do_early_param(char *param, char *val) { struct obs_kernel_param *p; - extern struct obs_kernel_param __setup_start, __setup_end; - for (p = &__setup_start; p < &__setup_end; p++) { + for (p = __setup_start; p < __setup_end; p++) { if (p->early && strcmp(param, p->str) == 0) { if (p->setup_func(val) != 0) printk(KERN_WARNING @@ -497,25 +483,50 @@ void __init parse_early_param(void) * Activate the first processor. */ +static void __init boot_cpu_init(void) +{ + int cpu = smp_processor_id(); + /* Mark the boot cpu "present", "online" etc for SMP and UP case */ + cpu_set(cpu, cpu_online_map); + cpu_set(cpu, cpu_present_map); + cpu_set(cpu, cpu_possible_map); +} + +void __init __attribute__((weak)) smp_setup_processor_id(void) +{ +} + asmlinkage void __init start_kernel(void) { char * command_line; extern struct kernel_param __start___param[], __stop___param[]; + + smp_setup_processor_id(); + + /* + * Need to run as early as possible, to initialize the + * lockdep hash: + */ + unwind_init(); + lockdep_init(); + + local_irq_disable(); + early_boot_irqs_off(); + early_init_irq_lock_class(); + /* * Interrupts are still disabled. Do necessary setups, then * enable them */ lock_kernel(); + boot_cpu_init(); page_address_init(); + printk(KERN_NOTICE); printk(linux_banner); setup_arch(&command_line); + unwind_setup(); setup_per_cpu_areas(); - - /* - * Mark the boot cpu "online" so that it can call console drivers in - * printk() and can access its per-cpu storage. - */ - smp_prepare_boot_cpu(); + smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ /* * Set up the scheduler prior starting any interrupts (such as the @@ -523,22 +534,38 @@ asmlinkage void __init start_kernel(void) * time - but meanwhile we still have a functioning scheduler. */ sched_init(); + /* + * Disable preemption - early bootup scheduling is extremely + * fragile until we cpu_idle() for the first time. + */ + preempt_disable(); build_all_zonelists(); page_alloc_init(); - printk("Kernel command line: %s\n", saved_command_line); + printk(KERN_NOTICE "Kernel command line: %s\n", saved_command_line); parse_early_param(); parse_args("Booting kernel", command_line, __start___param, __stop___param - __start___param, &unknown_bootoption); + if (!irqs_disabled()) { + printk(KERN_WARNING "start_kernel(): bug: interrupts were " + "enabled *very* early, fixing it\n"); + local_irq_disable(); + } sort_main_extable(); trap_init(); rcu_init(); init_IRQ(); pidhash_init(); - ckrm_init(); init_timers(); + hrtimers_init(); softirq_init(); + timekeeping_init(); time_init(); + profile_init(); + if (!irqs_disabled()) + printk("start_kernel(): bug: interrupts were enabled early\n"); + early_boot_irqs_on(); + local_irq_enable(); /* * HACK ALERT! This is early. We're enabling the console before @@ -548,8 +575,16 @@ asmlinkage void __init start_kernel(void) console_init(); if (panic_later) panic(panic_later, panic_param); - profile_init(); - local_irq_enable(); + + lockdep_info(); + + /* + * Need to run this when irqs are enabled, because it wants + * to self-test [hard/soft]-irqs on/off lock inversion bugs + * too: + */ + locking_selftest(); + #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && initrd_start < min_low_pfn << PAGE_SHIFT) { @@ -559,8 +594,10 @@ asmlinkage void __init start_kernel(void) } #endif vfs_caches_init_early(); + cpuset_init_early(); mem_init(); kmem_cache_init(); + setup_per_cpu_pageset(); numa_policy_init(); if (late_time_init) late_time_init(); @@ -577,6 +614,7 @@ asmlinkage void __init start_kernel(void) proc_caches_init(); buffer_init(); unnamed_dev_init(); + key_init(); security_init(); vfs_caches_init(num_physpages); radix_tree_init(); @@ -586,6 +624,10 @@ asmlinkage void __init start_kernel(void) #ifdef CONFIG_PROC_FS proc_root_init(); #endif + cpuset_init(); + taskstats_init_early(); + delayacct_init(); + check_bugs(); acpi_early_init(); /* before LAPIC and SMP init */ @@ -603,27 +645,31 @@ static int __init initcall_debug_setup(char *str) } __setup("initcall_debug", initcall_debug_setup); -struct task_struct *child_reaper = &init_task; - -extern initcall_t __initcall_start, __initcall_end; +extern initcall_t __initcall_start[], __initcall_end[]; static void __init do_initcalls(void) { initcall_t *call; int count = preempt_count(); - for (call = &__initcall_start; call < &__initcall_end; call++) { - char *msg; + for (call = __initcall_start; call < __initcall_end; call++) { + char *msg = NULL; + char msgbuf[40]; + int result; if (initcall_debug) { - printk(KERN_DEBUG "Calling initcall 0x%p", *call); - print_fn_descriptor_symbol(": %s()", (unsigned long) *call); + printk("Calling initcall 0x%p", *call); + print_fn_descriptor_symbol(": %s()", + (unsigned long) *call); printk("\n"); } - (*call)(); + result = (*call)(); - msg = NULL; + if (result && result != -ENODEV && initcall_debug) { + sprintf(msgbuf, "error code %d", result); + msg = msgbuf; + } if (preempt_count() != count) { msg = "preemption imbalance"; preempt_count() = count; @@ -633,8 +679,10 @@ static void __init do_initcalls(void) local_irq_enable(); } if (msg) { - printk("error in initcall at 0x%p: " - "returned with %s\n", *call, msg); + printk(KERN_WARNING "initcall at 0x%p", *call); + print_fn_descriptor_symbol(": %s()", + (unsigned long) *call); + printk(": returned with %s\n", msg); } } @@ -642,6 +690,30 @@ static void __init do_initcalls(void) flush_scheduled_work(); } +#ifdef CONFIG_BOOT_DELAY + +unsigned int boot_delay = 0; /* msecs delay after each printk during bootup */ +extern long preset_lpj; +unsigned long long printk_delay_msec = 0; /* per msec, based on boot_delay */ + +static int __init boot_delay_setup(char *str) +{ + unsigned long lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */ + unsigned long long loops_per_msec = lpj / 1000 * CONFIG_HZ; + + get_option(&str, &boot_delay); + if (boot_delay > 10 * 1000) + boot_delay = 0; + + printk_delay_msec = loops_per_msec; + printk("boot_delay: %u, preset_lpj: %ld, lpj: %lu, CONFIG_HZ: %d, printk_delay_msec: %llu\n", + boot_delay, preset_lpj, lpj, CONFIG_HZ, printk_delay_msec); + return 1; +} +__setup("boot_delay=", boot_delay_setup); + +#endif + /* * Ok, the machine is now initialized. None of the devices * have been touched yet, but the CPU subsystem is up and @@ -654,20 +726,25 @@ static void __init do_basic_setup(void) /* drivers will send hotplug events */ init_workqueues(); usermodehelper_init(); - key_init(); driver_init(); #ifdef CONFIG_SYSCTL sysctl_init(); #endif - /* Networking initialization needs a process context */ - sock_init(); - do_initcalls(); } -static void do_pre_smp_initcalls(void) +static int __initdata nosoftlockup; + +static int __init nosoftlockup_setup(char *str) +{ + nosoftlockup = 1; + return 1; +} +__setup("nosoftlockup", nosoftlockup_setup); + +static void __init do_pre_smp_initcalls(void) { extern int spawn_ksoftirqd(void); #ifdef CONFIG_SMP @@ -676,35 +753,23 @@ static void do_pre_smp_initcalls(void) migration_init(); #endif spawn_ksoftirqd(); + if (!nosoftlockup) + spawn_softlockup_task(); } static void run_init_process(char *init_filename) { argv_init[0] = init_filename; - execve(init_filename, argv_init, envp_init); -} - -static inline void fixup_cpu_present_map(void) -{ -#ifdef CONFIG_SMP - int i; - - /* - * If arch is not hotplug ready and did not populate - * cpu_present_map, just make cpu_present_map same as cpu_possible_map - * for other cpu bringup code to function as normal. e.g smp_init() etc. - */ - if (cpus_empty(cpu_present_map)) { - for_each_cpu(i) { - cpu_set(i, cpu_present_map); - } - } -#endif + kernel_execve(init_filename, argv_init, envp_init); } static int init(void * unused) { lock_kernel(); + /* + * init can run on any cpu. + */ + set_cpus_allowed(current, CPU_MASK_ALL); /* * Tell the world that we're going to be the grim * reaper of innocent orphaned children. @@ -713,35 +778,33 @@ static int init(void * unused) * assumptions about where in the task array this * can be found. */ - child_reaper = current; + init_pid_ns.child_reaper = current; + + cad_pid = task_pid(current); - /* Sets up cpus_possible() */ smp_prepare_cpus(max_cpus); do_pre_smp_initcalls(); - fixup_cpu_present_map(); smp_init(); + sched_init_smp(); - /* - * Do this before initcalls, because some drivers want to access - * firmware files. - */ - populate_rootfs(); - do_basic_setup(); - - init_ckrm_sched_res(); + cpuset_init_smp(); - sched_init_smp(); + do_basic_setup(); /* * check if there is an early userspace init. If yes, let it do all * the work */ - if (sys_access((const char __user *) "/init", 0) == 0) - execute_command = "/init"; - else + + if (!ramdisk_execute_command) + ramdisk_execute_command = "/init"; + + if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) { + ramdisk_execute_command = NULL; prepare_namespace(); + } /* * Ok, we have completed the initial bootup, and @@ -750,25 +813,33 @@ static int init(void * unused) */ free_initmem(); unlock_kernel(); + mark_rodata_ro(); system_state = SYSTEM_RUNNING; numa_default_policy(); if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) - printk("Warning: unable to open an initial console.\n"); + printk(KERN_WARNING "Warning: unable to open an initial console.\n"); (void) sys_dup(0); (void) sys_dup(0); - + + if (ramdisk_execute_command) { + run_init_process(ramdisk_execute_command); + printk(KERN_WARNING "Failed to execute %s\n", + ramdisk_execute_command); + } + /* * We try each of these until one succeeds. * * The Bourne shell can be used instead of init if we are * trying to recover a really broken machine. */ - - if (execute_command) + if (execute_command) { run_init_process(execute_command); - + printk(KERN_WARNING "Failed to execute %s. Attempting " + "defaults...\n", execute_command); + } run_init_process("/sbin/init"); run_init_process("/etc/init"); run_init_process("/bin/init"); @@ -776,16 +847,3 @@ static int init(void * unused) panic("No init found. Try passing init= option to kernel."); } - -static int early_param_test(char *rest) -{ - printk("early_parm_test: %s\n", rest ?: "(null)"); - return rest ? 0 : -EINVAL; -} -early_param("testsetup", early_param_test); -static int early_setup_test(char *rest) -{ - printk("early_setup_test: %s\n", rest ?: "(null)"); - return 0; -} -__setup("testsetup_long", early_setup_test);