#define __KERNEL_SYSCALLS__
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
-#include <linux/devfs_fs_kernel.h>
#include <linux/kernel.h>
#include <linux/syscalls.h>
#include <linux/string.h>
#include <linux/tty.h>
#include <linux/gfp.h>
#include <linux/percpu.h>
+#include <linux/kmod.h>
#include <linux/kernel_stat.h>
#include <linux/security.h>
#include <linux/workqueue.h>
#include <linux/kallsyms.h>
#include <linux/writeback.h>
#include <linux/cpu.h>
+#include <linux/cpuset.h>
#include <linux/efi.h>
+#include <linux/taskstats_kern.h>
+#include <linux/delayacct.h>
#include <linux/unistd.h>
#include <linux/rmap.h>
#include <linux/mempolicy.h>
+#include <linux/key.h>
+#include <linux/unwind.h>
+#include <linux/buffer_head.h>
+#include <linux/debug_locks.h>
+#include <linux/lockdep.h>
#include <asm/io.h>
#include <asm/bugs.h>
#include <asm/setup.h>
-
-#include <linux/ckrm.h>
-#ifdef CONFIG_CKRM_CPU_SCHEDULE
-int __init init_ckrm_sched_res(void);
-#else
-#define init_ckrm_sched_res() ((void)0)
-#endif
-
-/*
- * This is one of the first .c files built. Error out early
- * if we have compiler trouble..
- */
-#if __GNUC__ == 2 && __GNUC_MINOR__ == 96
-#ifdef CONFIG_FRAME_POINTER
-#error This compiler cannot compile correctly with frame pointers enabled
-#endif
-#endif
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/smp.h>
#endif
/*
- * Versions of gcc older than that listed below may actually compile
- * and link okay, but the end product can have subtle run time bugs.
- * To avoid associated bogus bug reports, we flatly refuse to compile
- * with a gcc that is known to be too old from the very beginning.
+ * This is one of the first .c files built. Error out early if we have compiler
+ * trouble.
+ *
+ * Versions of gcc older than that listed below may actually compile and link
+ * okay, but the end product can have subtle run time bugs. To avoid associated
+ * bogus bug reports, we flatly refuse to compile with a gcc that is known to be
+ * too old from the very beginning.
*/
-#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95)
+#if (__GNUC__ < 3) || (__GNUC__ == 3 && __GNUC_MINOR__ < 2)
#error Sorry, your GCC is too old. It builds incorrect kernels.
#endif
-extern char *linux_banner;
-
static int init(void *);
extern void init_IRQ(void);
-extern void sock_init(void);
extern void fork_init(unsigned long);
extern void mca_init(void);
extern void sbus_init(void);
extern void sysctl_init(void);
extern void signals_init(void);
-extern void buffer_init(void);
extern void pidhash_init(void);
extern void pidmap_init(void);
extern void prio_tree_init(void);
extern void populate_rootfs(void);
extern void driver_init(void);
extern void prepare_namespace(void);
+#ifdef CONFIG_ACPI
+extern void acpi_early_init(void);
+#else
+static inline void acpi_early_init(void) { }
+#endif
+#ifndef CONFIG_DEBUG_RODATA
+static inline void mark_rodata_ro(void) { }
+#endif
#ifdef CONFIG_TC
extern void tc_init(void);
enum system_states system_state;
EXPORT_SYMBOL(system_state);
-/*
- * The kernel_magic value represents the address of _end, which allows
- * namelist tools to "match" each other respectively. That way a tool
- * that looks at /dev/mem can verify that it is using the right System.map
- * file -- if kernel_magic doesn't equal the namelist value of _end,
- * something's wrong.
- */
-extern unsigned long _end;
-unsigned long *kernel_magic = &_end;
-
/*
* Boot command-line arguments
*/
-#define MAX_INIT_ARGS 8
-#define MAX_INIT_ENVS 8
+#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT
+#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT
extern void time_init(void);
/* Default late time init is NULL. archs can override this later. */
char saved_command_line[COMMAND_LINE_SIZE];
static char *execute_command;
+static char *ramdisk_execute_command;
/* Setup configured maximum number of CPUs to activate */
static unsigned int max_cpus = NR_CPUS;
char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
static const char *panic_later, *panic_param;
-__setup("profile=", profile_setup);
+extern struct obs_kernel_param __setup_start[], __setup_end[];
static int __init obsolete_checksetup(char *line)
{
struct obs_kernel_param *p;
- extern struct obs_kernel_param __setup_start, __setup_end;
- p = &__setup_start;
+ p = __setup_start;
do {
int n = strlen(p->str);
if (!strncmp(line, p->str, n)) {
return 1;
}
p++;
- } while (p < &__setup_end);
+ } while (p < __setup_end);
return 0;
}
-/* this should be approx 2 Bo*oMips to start (note initial shift), and will
- still work even if initially too large, it will just take slightly longer */
+/*
+ * This should be approx 2 Bo*oMips to start (note initial shift), and will
+ * still work even if initially too large, it will just take slightly longer
+ */
unsigned long loops_per_jiffy = (1<<12);
EXPORT_SYMBOL(loops_per_jiffy);
-/* This is the number of bits of precision for the loops_per_jiffy. Each
- bit takes on average 1.5/HZ seconds. This (like the original) is a little
- better than 1% */
-#define LPS_PREC 8
-
-void __devinit calibrate_delay(void)
-{
- unsigned long ticks, loopbit;
- int lps_precision = LPS_PREC;
-
- loops_per_jiffy = (1<<12);
-
- printk("Calibrating delay loop... ");
- while ((loops_per_jiffy <<= 1) != 0) {
- /* wait for "start of" clock tick */
- ticks = jiffies;
- while (ticks == jiffies)
- /* nothing */;
- /* Go .. */
- ticks = jiffies;
- __delay(loops_per_jiffy);
- ticks = jiffies - ticks;
- if (ticks)
- break;
- }
-
-/* Do a binary approximation to get loops_per_jiffy set to equal one clock
- (up to lps_precision bits) */
- loops_per_jiffy >>= 1;
- loopbit = loops_per_jiffy;
- while ( lps_precision-- && (loopbit >>= 1) ) {
- loops_per_jiffy |= loopbit;
- ticks = jiffies;
- while (ticks == jiffies);
- ticks = jiffies;
- __delay(loops_per_jiffy);
- if (jiffies != ticks) /* longer than 1 tick */
- loops_per_jiffy &= ~loopbit;
- }
-
-/* Round the value and print it */
- printk("%lu.%02lu BogoMIPS\n",
- loops_per_jiffy/(500000/HZ),
- (loops_per_jiffy/(5000/HZ)) % 100);
-}
-
static int __init debug_kernel(char *str)
{
if (*str)
__setup("debug", debug_kernel);
__setup("quiet", quiet_kernel);
-/* Unknown boot options get handed to init, unless they look like
- failed parameters */
+static int __init loglevel(char *str)
+{
+ get_option(&str, &console_loglevel);
+ return 1;
+}
+
+__setup("loglevel=", loglevel);
+
+/*
+ * Unknown boot options get handed to init, unless they look like
+ * failed parameters
+ */
static int __init unknown_bootoption(char *param, char *val)
{
/* Change NUL term back to "=", to make "param" the whole string. */
- if (val)
- val[-1] = '=';
+ if (val) {
+ /* param=val or param="val"? */
+ if (val == param+strlen(param)+1)
+ val[-1] = '=';
+ else if (val == param+strlen(param)+2) {
+ val[-2] = '=';
+ memmove(val-1, val, strlen(val)+1);
+ val--;
+ } else
+ BUG();
+ }
/* Handle obsolete-style parameters */
if (obsolete_checksetup(param))
return 0;
- /* Preemptive maintenance for "why didn't my mispelled command
- line work?" */
+ /*
+ * Preemptive maintenance for "why didn't my mispelled command
+ * line work?"
+ */
if (strchr(param, '.') && (!val || strchr(param, '.') < val)) {
printk(KERN_ERR "Unknown boot option `%s': ignoring\n", param);
return 0;
unsigned int i;
execute_command = str;
- /* In case LILO is going to boot us with default command line,
+ /*
+ * In case LILO is going to boot us with default command line,
* it prepends "auto" before the whole cmdline which makes
* the shell think it should execute a script with such name.
* So we ignore all arguments entered _before_ init=... [MJ]
}
__setup("init=", init_setup);
-extern void setup_arch(char **);
-extern void cpu_idle(void);
+static int __init rdinit_setup(char *str)
+{
+ unsigned int i;
+
+ ramdisk_execute_command = str;
+ /* See "auto" comment in init_setup */
+ for (i = 1; i < MAX_INIT_ARGS; i++)
+ argv_init[i] = NULL;
+ return 1;
+}
+__setup("rdinit=", rdinit_setup);
#ifndef CONFIG_SMP
#else
#ifdef __GENERIC_PER_CPU
-unsigned long __per_cpu_offset[NR_CPUS];
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
{
unsigned long size, i;
char *ptr;
- /* Created by linker magic */
- extern char __per_cpu_start[], __per_cpu_end[];
+ unsigned long nr_possible_cpus = num_possible_cpus();
/* Copy section for each CPU (we discard the original) */
size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
if (size < PERCPU_ENOUGH_ROOM)
size = PERCPU_ENOUGH_ROOM;
#endif
+ ptr = alloc_bootmem(size * nr_possible_cpus);
- ptr = alloc_bootmem(size * NR_CPUS);
-
- for (i = 0; i < NR_CPUS; i++, ptr += size) {
+ for_each_possible_cpu(i) {
__per_cpu_offset[i] = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+ ptr += size;
}
}
#endif /* !__GENERIC_PER_CPU */
+#include <linux/ext3_fs_i.h>
+#include <linux/skbuff.h>
+#include <linux/sched.h>
+
/* Called by boot processor to activate the rest. */
static void __init smp_init(void)
{
}
/* Any cleanup work */
- printk("Brought up %ld CPUs\n", (long)num_online_cpus());
+ printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
smp_cpus_done(max_cpus);
#if 0
/* Get other processors into their bootup holding patterns. */
- smp_threads_ready=1;
smp_commence();
#endif
+
+ printk(KERN_DEBUG "sizeof(vma)=%u bytes\n", (unsigned int) sizeof(struct vm_area_struct));
+ printk(KERN_DEBUG "sizeof(page)=%u bytes\n", (unsigned int) sizeof(struct page));
+ printk(KERN_DEBUG "sizeof(inode)=%u bytes\n", (unsigned int) sizeof(struct inode));
+ printk(KERN_DEBUG "sizeof(dentry)=%u bytes\n", (unsigned int) sizeof(struct dentry));
+ printk(KERN_DEBUG "sizeof(ext3inode)=%u bytes\n", (unsigned int) sizeof(struct ext3_inode_info));
+ printk(KERN_DEBUG "sizeof(buffer_head)=%u bytes\n", (unsigned int) sizeof(struct buffer_head));
+ printk(KERN_DEBUG "sizeof(skbuff)=%u bytes\n", (unsigned int) sizeof(struct sk_buff));
+ printk(KERN_DEBUG "sizeof(task_struct)=%u bytes\n", (unsigned int) sizeof(struct task_struct));
}
#endif
*/
static void noinline rest_init(void)
+ __releases(kernel_lock)
{
kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND);
numa_default_policy();
- system_state = SYSTEM_BOOTING_SCHEDULER_OK;
unlock_kernel();
- cpu_idle();
+
+ /*
+ * The boot idle thread must execute schedule()
+ * at least one to get things moving:
+ */
+ preempt_enable_no_resched();
+ schedule();
+ preempt_disable();
+
+ /* Call into cpu_idle with preempt disabled */
+ cpu_idle();
}
/* Check for early params. */
static int __init do_early_param(char *param, char *val)
{
struct obs_kernel_param *p;
- extern struct obs_kernel_param __setup_start, __setup_end;
- for (p = &__setup_start; p < &__setup_end; p++) {
+ for (p = __setup_start; p < __setup_end; p++) {
if (p->early && strcmp(param, p->str) == 0) {
if (p->setup_func(val) != 0)
printk(KERN_WARNING
* Activate the first processor.
*/
+static void __init boot_cpu_init(void)
+{
+ int cpu = smp_processor_id();
+ /* Mark the boot cpu "present", "online" etc for SMP and UP case */
+ cpu_set(cpu, cpu_online_map);
+ cpu_set(cpu, cpu_present_map);
+ cpu_set(cpu, cpu_possible_map);
+}
+
+void __init __attribute__((weak)) smp_setup_processor_id(void)
+{
+}
+
asmlinkage void __init start_kernel(void)
{
char * command_line;
extern struct kernel_param __start___param[], __stop___param[];
+
+ smp_setup_processor_id();
+
+ /*
+ * Need to run as early as possible, to initialize the
+ * lockdep hash:
+ */
+ unwind_init();
+ lockdep_init();
+
+ local_irq_disable();
+ early_boot_irqs_off();
+ early_init_irq_lock_class();
+
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
lock_kernel();
+ boot_cpu_init();
page_address_init();
+ printk(KERN_NOTICE);
printk(linux_banner);
setup_arch(&command_line);
setup_per_cpu_areas();
+ smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
- /*
- * Mark the boot cpu "online" so that it can call console drivers in
- * printk() and can access its per-cpu storage.
- */
- smp_prepare_boot_cpu();
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init();
-
+ /*
+ * Disable preemption - early bootup scheduling is extremely
+ * fragile until we cpu_idle() for the first time.
+ */
+ preempt_disable();
build_all_zonelists();
page_alloc_init();
- printk("Kernel command line: %s\n", saved_command_line);
+ printk(KERN_NOTICE "Kernel command line: %s\n", saved_command_line);
parse_early_param();
parse_args("Booting kernel", command_line, __start___param,
__stop___param - __start___param,
rcu_init();
init_IRQ();
pidhash_init();
- /* MEF: In 2.6.5. ckrm_init was right after pidhash_init() but
- before sched_init(). Will leave it after pidhash_init()
- and cross finger.
- */
- ckrm_init();
init_timers();
+ hrtimers_init();
softirq_init();
+ timekeeping_init();
time_init();
+ profile_init();
+ if (!irqs_disabled())
+ printk("start_kernel(): bug: interrupts were enabled early\n");
+ early_boot_irqs_on();
+ local_irq_enable();
/*
* HACK ALERT! This is early. We're enabling the console before
console_init();
if (panic_later)
panic(panic_later, panic_param);
- profile_init();
- local_irq_enable();
+
+ lockdep_info();
+
+ /*
+ * Need to run this when irqs are enabled, because it wants
+ * to self-test [hard/soft]-irqs on/off lock inversion bugs
+ * too:
+ */
+ locking_selftest();
+
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
initrd_start < min_low_pfn << PAGE_SHIFT) {
}
#endif
vfs_caches_init_early();
+ cpuset_init_early();
mem_init();
kmem_cache_init();
+ setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
late_time_init();
proc_caches_init();
buffer_init();
unnamed_dev_init();
- security_scaffolding_startup();
+ key_init();
+ security_init();
vfs_caches_init(num_physpages);
radix_tree_init();
signals_init();
#ifdef CONFIG_PROC_FS
proc_root_init();
#endif
+ cpuset_init();
+ taskstats_init_early();
+ delayacct_init();
check_bugs();
- /*
- * We count on the initial thread going ok
- * Like idlers init is an unlocked kernel thread, which will
- * make syscalls (and thus be locked).
- */
- init_idle(current, smp_processor_id());
+ acpi_early_init(); /* before LAPIC and SMP init */
/* Do the rest non-__init'ed, we're now alive */
rest_init();
}
__setup("initcall_debug", initcall_debug_setup);
+#ifdef CONFIG_BOOT_DELAY
+
+unsigned int boot_delay = 0; /* msecs delay after each printk during bootup */
+extern long preset_lpj;
+unsigned long long printk_delay_msec = 0; /* per msec, based on boot_delay */
+
+static int __init boot_delay_setup(char *str)
+{
+ unsigned long lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */
+ unsigned long long loops_per_msec = lpj / 1000 * CONFIG_HZ;
+
+ get_option(&str, &boot_delay);
+ if (boot_delay > 10 * 1000)
+ boot_delay = 0;
+
+ printk_delay_msec = loops_per_msec;
+ printk("boot_delay: %u, preset_lpj: %ld, lpj: %lu, CONFIG_HZ: %d, printk_delay_msec: %llu\n",
+ boot_delay, preset_lpj, lpj, CONFIG_HZ, printk_delay_msec);
+
+ return 1;
+}
+__setup("boot_delay=", boot_delay_setup);
+
+#endif
+
struct task_struct *child_reaper = &init_task;
-extern initcall_t __initcall_start, __initcall_end;
+extern initcall_t __initcall_start[], __initcall_end[];
static void __init do_initcalls(void)
{
initcall_t *call;
int count = preempt_count();
- for (call = &__initcall_start; call < &__initcall_end; call++) {
- char *msg;
+ for (call = __initcall_start; call < __initcall_end; call++) {
+ char *msg = NULL;
+ char msgbuf[40];
+ int result;
if (initcall_debug) {
- printk(KERN_DEBUG "Calling initcall 0x%p", *call);
- print_symbol(": %s()", (unsigned long) *call);
+ printk("Calling initcall 0x%p", *call);
+ print_fn_descriptor_symbol(": %s()",
+ (unsigned long) *call);
printk("\n");
}
- (*call)();
+ result = (*call)();
- msg = NULL;
+ if (result && result != -ENODEV && initcall_debug) {
+ sprintf(msgbuf, "error code %d", result);
+ msg = msgbuf;
+ }
if (preempt_count() != count) {
msg = "preemption imbalance";
preempt_count() = count;
local_irq_enable();
}
if (msg) {
- printk("error in initcall at 0x%p: "
- "returned with %s\n", *call, msg);
+ printk(KERN_WARNING "initcall at 0x%p", *call);
+ print_fn_descriptor_symbol(": %s()",
+ (unsigned long) *call);
+ printk(": returned with %s\n", msg);
}
}
*/
static void __init do_basic_setup(void)
{
+ /* drivers will send hotplug events */
+ init_workqueues();
+ usermodehelper_init();
driver_init();
#ifdef CONFIG_SYSCTL
sysctl_init();
#endif
- /* Networking initialization needs a process context */
- sock_init();
-
- init_workqueues();
do_initcalls();
}
+static int __initdata nosoftlockup;
+
+static int __init nosoftlockup_setup(char *str)
+{
+ nosoftlockup = 1;
+ return 1;
+}
+__setup("nosoftlockup", nosoftlockup_setup);
+
static void do_pre_smp_initcalls(void)
{
extern int spawn_ksoftirqd(void);
migration_init();
#endif
spawn_ksoftirqd();
+ if (!nosoftlockup)
+ spawn_softlockup_task();
}
static void run_init_process(char *init_filename)
execve(init_filename, argv_init, envp_init);
}
-static inline void fixup_cpu_present_map(void)
-{
-#ifdef CONFIG_SMP
- int i;
-
- /*
- * If arch is not hotplug ready and did not populate
- * cpu_present_map, just make cpu_present_map same as cpu_possible_map
- * for other cpu bringup code to function as normal. e.g smp_init() etc.
- */
- if (cpus_empty(cpu_present_map)) {
- for_each_cpu(i) {
- cpu_set(i, cpu_present_map);
- }
- }
-#endif
-}
-
static int init(void * unused)
{
lock_kernel();
+ /*
+ * init can run on any cpu.
+ */
+ set_cpus_allowed(current, CPU_MASK_ALL);
/*
* Tell the world that we're going to be the grim
* reaper of innocent orphaned children.
*/
child_reaper = current;
- /* Sets up cpus_possible() */
smp_prepare_cpus(max_cpus);
do_pre_smp_initcalls();
- fixup_cpu_present_map();
smp_init();
+ sched_init_smp();
+
+ cpuset_init_smp();
/*
* Do this before initcalls, because some drivers want to access
* firmware files.
*/
populate_rootfs();
- do_basic_setup();
- init_ckrm_sched_res();
- sched_init_smp();
+ do_basic_setup();
/*
* check if there is an early userspace init. If yes, let it do all
* the work
*/
- if (sys_access((const char __user *) "/init", 0) == 0)
- execute_command = "/init";
- else
+
+ if (!ramdisk_execute_command)
+ ramdisk_execute_command = "/init";
+
+ if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
+ ramdisk_execute_command = NULL;
prepare_namespace();
+ }
/*
* Ok, we have completed the initial bootup, and
*/
free_initmem();
unlock_kernel();
+ mark_rodata_ro();
system_state = SYSTEM_RUNNING;
numa_default_policy();
if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
- printk("Warning: unable to open an initial console.\n");
+ printk(KERN_WARNING "Warning: unable to open an initial console.\n");
(void) sys_dup(0);
(void) sys_dup(0);
-
+
+ if (ramdisk_execute_command) {
+ run_init_process(ramdisk_execute_command);
+ printk(KERN_WARNING "Failed to execute %s\n",
+ ramdisk_execute_command);
+ }
+
/*
* We try each of these until one succeeds.
*
* The Bourne shell can be used instead of init if we are
* trying to recover a really broken machine.
*/
-
- if (execute_command)
+ if (execute_command) {
run_init_process(execute_command);
-
+ printk(KERN_WARNING "Failed to execute %s. Attempting "
+ "defaults...\n", execute_command);
+ }
run_init_process("/sbin/init");
run_init_process("/etc/init");
run_init_process("/bin/init");
panic("No init found. Try passing init= option to kernel.");
}
-
-static int early_param_test(char *rest)
-{
- printk("early_parm_test: %s\n", rest ?: "(null)");
- return rest ? 0 : -EINVAL;
-}
-early_param("testsetup", early_param_test);
-static int early_setup_test(char *rest)
-{
- printk("early_setup_test: %s\n", rest ?: "(null)");
- return 0;
-}
-__setup("testsetup_long", early_setup_test);