'before-ckrm_E16-mem-controller-O1-merge'.
--- /dev/null
+.config
+.tmp_System.map
+.tmp_kallsyms1.S
+.tmp_kallsyms2.S
+.tmp_kallsyms3.S
+.tmp_versions
+.tmp_vmlinux1
+.tmp_vmlinux2
+.tmp_vmlinux3
+.version
+Module.symvers
+System.map
+vmlinux
W: http://www.cse.unsw.edu.au/~neilb/patches/linux-devel/
S: Maintained
+KEXEC
+P: Eric Biederman
+P: Randy Dunlap
+M: ebiederm@xmission.com
+M: rddunlap@osdl.org
+W: http://www.xmission.com/~ebiederm/files/kexec/
+W: http://developer.osdl.org/rddunlap/kexec/
+L: linux-kernel@vger.kernel.org
+L: fastboot@osdl.org
+S: Maintained
+
LANMEDIA WAN CARD DRIVER
P: Andrew Stanley-Jones
M: asj@lanmedia.com
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 8
-EXTRAVERSION = -1.521.2.5.planetlab
+EXTRAVERSION = -1.521.3.planetlab.2004.12.14
NAME=Zonked Quokka
# *DOCUMENTATION*
CFLAGS += -fomit-frame-pointer
endif
+ifdef CONFIG_X86_STACK_CHECK
+CFLAGS += -p
+endif
+
ifdef CONFIG_DEBUG_INFO
CFLAGS += -g
endif
generate incorrect output with certain kernel constructs when
-mregparm=3 is used.
+config IRQSTACKS
+ bool "Use separate IRQ stacks"
+ help
+ If you say Y here the kernel will use a separate IRQ stack on each
+ cpu to handle interrupts.
+
+config STACK_SIZE_SHIFT
+ int "Kernel stack size (12 => 4KB, 13 => 8KB, 14 => 16KB)"
+ range 12 14
+ default 12 if IRQSTACKS
+ default 13
+ help
+ Select kernel stack size. 4KB stacks are best as they let
+ the system scale further. Use 8KB stacks if you have an
+ experimental kernel where a stack overlow with a 4KB stack
+ might occur. Use 16KB stacks if you want to safely support
+ Windows device drivers using either Linuxant or ndiswrapper.
+
+config STACK_WARN
+ int "Print stack trace when stack grows beyond specified bytes"
+ default 4096 if IRQSTACKS
+ default 4096
+ help
+ The kernel will print a stack trace when the current stack exceeds
+ the specified size.
+
+config X86_STACK_CHECK
+ bool "Check for stack overflows"
+ default n
+ help
+ Say Y here to have the kernel attempt to detect when the per-task
+ kernel stack overflows.
+
+ Some older versions of gcc don't handle the -p option correctly.
+ Kernprof is affected by the same problem, which is described here:
+ http://oss.sgi.com/projects/kernprof/faq.html#Q9
+
+ Basically, if you get oopses in __free_pages_ok during boot when
+ you have this turned on, you need to fix gcc. The Redhat 2.96
+ version and gcc-3.x seem to work.
+
+ If not debugging a stack overflow problem, say N
+
+config STACK_PANIC
+ int "Panic when stack approaches with specified bytes of the stack limit"
+ depends on X86_STACK_CHECK
+ default 512 if IRQSTACKS
+ default 512
+ help
+ Panic if the stack grows to within specified byte range.
+
+config KEXEC
+ bool "kexec system call (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is indepedent of the system firmware. And like a reboot
+ you can start any kernel with it, not just Linux.
+
+ The name comes from the similiarity to the exec system call.
+
+ It is an ongoing process to be certain the hardware in a machine
+ is properly shutdown, so do not be surprised if this code does not
+ initially work for you. It may help to enable device hotplugging
+ support. As of this writing the exact hardware interface is
+ strongly in flux, so no good recommendation can be made.
+
endmenu
--- /dev/null
+bootsect
+bzImage
+setup
+vmlinux.bin
--- /dev/null
+vmlinux
+vmlinux.bin
+vmlinux.bin.gz
if (high_loaded) close_output_buffer_if_we_run_high(mv);
return high_loaded;
}
+
+/* We don't actually check for stack overflows this early. */
+__asm__(".globl mcount ; mcount: ret\n");
CONFIG_EARLY_PRINTK=y
CONFIG_DEBUG_SPINLOCK_SLEEP=y
# CONFIG_FRAME_POINTER is not set
-CONFIG_4KSTACKS=y
+# CONFIG_4KSTACKS is not set
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
--- /dev/null
+asm-offsets.s
+vmlinux.lds.s
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_X86_NUMAQ) += numaq.o
obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
obj-$(CONFIG_MODULES) += module.o
outb(0x70, 0x22);
outb(0x00, 0x23);
}
+ else {
+ /* Go back to Virtual Wire compatibility mode */
+ unsigned long value;
+
+ /* For the spurious interrupt use vector F, and enable it */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ value |= APIC_SPIV_APIC_ENABLED;
+ value |= 0xf;
+ apic_write_around(APIC_SPIV, value);
+
+ /* For LVT0 make it edge triggered, active high, external and enabled */
+ value = apic_read(APIC_LVT0);
+ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXINT);
+ apic_write_around(APIC_LVT0, value);
+
+ /* For LVT1 make it edge triggered, active high, nmi and enabled */
+ value = apic_read(APIC_LVT1);
+ value &= ~(
+ APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+ apic_write_around(APIC_LVT1, value);
+ }
}
void disable_local_APIC(void)
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/personality.h>
+#include <linux/thread_info.h>
#include <asm/ucontext.h>
#include "sigframe.h"
#include <asm/fixmap.h>
#include <asm/processor.h>
-#include <asm/thread_info.h>
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
.long sys_mq_timedreceive /* 280 */
.long sys_mq_notify
.long sys_mq_getsetattr
- .long sys_ni_syscall /* reserved for kexec */
+ .long sys_kexec_load
.long sys_ioprio_set
.long sys_ioprio_get /* 285 */
syscall_table_size=(.-sys_call_table)
+
+#ifdef CONFIG_X86_STACK_CHECK
+.data
+.globl stack_overflowed
+stack_overflowed:
+ .long 0
+.text
+
+ENTRY(mcount)
+#warning stack check enabled
+ push %eax
+ movl $(THREAD_SIZE - 1),%eax
+ andl %esp,%eax
+ cmpl $STACK_WARN,%eax
+ jle 1f
+2:
+ popl %eax
+ ret
+1:
+ /* prevent infinite recursion from call to mcount from the
+ * stack_overflow function. Need to revisit this code for
+ * SMP based systems.
+ */
+ lock; btsl $0,stack_overflowed
+ jc 2b
+
+ /* prepare to jmp to stack_overflow directly, as if it were
+ * called directly by the caller of mcount.
+ */
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+
+ call stack_overflow
+ /* Note that stack_overflow() will clear the stack_overflowed
+ * variable.
+ */
+
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+
+ popl %eax
+ ret
+#endif
EXPORT_SYMBOL(__PAGE_KERNEL);
+#ifdef CONFIG_X86_STACK_CHECK
+extern void mcount(void);
+EXPORT_SYMBOL(mcount);
+#endif
+
+
#ifdef CONFIG_HIGHMEM
EXPORT_SYMBOL(kmap);
EXPORT_SYMBOL(kunmap);
return 0;
}
+static int i8259A_shutdown(struct sys_device *dev)
+{
+ /* Put the i8259A into a quiescent state that
+ * the kernel initialization code can get it
+ * out of.
+ */
+ outb(0xff, 0x21); /* mask all of 8259A-1 */
+ outb(0xff, 0xA1); /* mask all of 8259A-1 */
+ return 0;
+}
+
static struct sysdev_class i8259_sysdev_class = {
set_kset_name("i8259"),
.resume = i8259A_resume,
+ .shutdown = i8259A_shutdown,
};
static struct sys_device device_i8259A = {
__attribute__((__section__(".data.init_task"))) =
{ INIT_THREAD_INFO(init_task, init_thread_union) };
+#ifdef CONFIG_X86_STACK_CHECK
+union thread_union stack_overflow_stack
+ __attribute__((__section__(".data.init_task"))) =
+ { INIT_THREAD_INFO(init_task, stack_overflow_stack) };
+#endif
+
+
/*
* Initial task structure.
*
*/
void disable_IO_APIC(void)
{
+ int pin;
/*
* Clear the IO-APIC before rebooting:
*/
clear_IO_APIC();
+ /*
+ * If the i82559 is routed through an IOAPIC
+ * Put that IOAPIC in virtual wire mode
+ * so legacy interrups can be delivered.
+ */
+ pin = find_isa_irq_pin(0, mp_ExtINT);
+ if (pin != -1) {
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 0; /* Enabled */
+ entry.trigger = 0; /* Edge */
+ entry.irr = 0;
+ entry.polarity = 0; /* High */
+ entry.delivery_status = 0;
+ entry.dest_mode = 0; /* Physical */
+ entry.delivery_mode = 7; /* ExtInt */
+ entry.vector = 0;
+ entry.dest.physical.physical_dest = 0;
+
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
disconnect_bsp_APIC();
}
/*
* per-CPU IRQ handling stacks
*/
+#ifdef CONFIG_IRQSTACKS
union irq_ctx *hardirq_ctx[NR_CPUS];
union irq_ctx *softirq_ctx[NR_CPUS];
+#endif
/*
* Special irq handlers.
int status = 1; /* Force the "do bottom halves" bit */
int retval = 0;
+ if (!(action->flags & SA_INTERRUPT))
+ local_irq_enable();
+
do {
status |= action->flags;
retval |= action->handler(irq, action->dev_id, regs);
u32 *isp;
union irq_ctx * curctx;
union irq_ctx * irqctx;
-
+#ifdef CONFIG_IRQSTACKS
curctx = (union irq_ctx *) current_thread_info();
irqctx = hardirq_ctx[smp_processor_id()];
-
+#else
+ curctx = irqctx = (union irq_ctx *)0;
+#endif
spin_unlock(&desc->lock);
/*
break;
desc->status &= ~IRQ_PENDING;
}
-
desc->status &= ~IRQ_INPROGRESS;
out:
}
+#ifdef CONFIG_IRQSTACKS
/*
* These should really be __section__(".bss.page_aligned") as well, but
* gcc's 3.0 and earlier don't handle that correctly.
}
EXPORT_SYMBOL(do_softirq);
+#endif
--- /dev/null
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/cpufeature.h>
+
+static inline unsigned long read_cr3(void)
+{
+ unsigned long cr3;
+ asm volatile("movl %%cr3,%0": "=r"(cr3));
+ return cr3;
+}
+
+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+
+#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L2_ATTR (_PAGE_PRESENT)
+
+#define LEVEL0_SIZE (1UL << 12UL)
+
+#ifndef CONFIG_X86_PAE
+#define LEVEL1_SIZE (1UL << 22UL)
+static u32 pgtable_level1[1024] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ unsigned long level1_index, level2_index;
+ u32 *pgtable_level2;
+
+ /* Find the current page table */
+ pgtable_level2 = __va(read_cr3());
+
+ /* Find the indexes of the physical address to identity map */
+ level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+ level2_index = address / LEVEL1_SIZE;
+
+ /* Identity map the page table entry */
+ pgtable_level1[level1_index] = address | L0_ATTR;
+ pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ load_cr3(pgtable_level2);
+}
+
+#else
+#define LEVEL1_SIZE (1UL << 21UL)
+#define LEVEL2_SIZE (1UL << 30UL)
+static u64 pgtable_level1[512] PAGE_ALIGNED;
+static u64 pgtable_level2[512] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ unsigned long level1_index, level2_index, level3_index;
+ u64 *pgtable_level3;
+
+ /* Find the current page table */
+ pgtable_level3 = __va(read_cr3());
+
+ /* Find the indexes of the physical address to identity map */
+ level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+ level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
+ level3_index = address / LEVEL2_SIZE;
+
+ /* Identity map the page table entry */
+ pgtable_level1[level1_index] = address | L0_ATTR;
+ pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+ set_64bit(&pgtable_level3[level3_index], __pa(pgtable_level2) | L2_ATTR);
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ load_cr3(pgtable_level3);
+}
+#endif
+
+
+static void set_idt(void *newidt, __u16 limit)
+{
+ unsigned char curidt[6];
+
+ /* ia32 supports unaliged loads & stores */
+ (*(__u16 *)(curidt)) = limit;
+ (*(__u32 *)(curidt +2)) = (unsigned long)(newidt);
+
+ __asm__ __volatile__ (
+ "lidt %0\n"
+ : "=m" (curidt)
+ );
+};
+
+
+static void set_gdt(void *newgdt, __u16 limit)
+{
+ unsigned char curgdt[6];
+
+ /* ia32 supports unaligned loads & stores */
+ (*(__u16 *)(curgdt)) = limit;
+ (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt);
+
+ __asm__ __volatile__ (
+ "lgdt %0\n"
+ : "=m" (curgdt)
+ );
+};
+
+static void load_segments(void)
+{
+#define __STR(X) #X
+#define STR(X) __STR(X)
+
+ __asm__ __volatile__ (
+ "\tljmp $"STR(__KERNEL_CS)",$1f\n"
+ "\t1:\n"
+ "\tmovl $"STR(__KERNEL_DS)",%eax\n"
+ "\tmovl %eax,%ds\n"
+ "\tmovl %eax,%es\n"
+ "\tmovl %eax,%fs\n"
+ "\tmovl %eax,%gs\n"
+ "\tmovl %eax,%ss\n"
+ );
+#undef STR
+#undef __STR
+}
+
+typedef asmlinkage void (*relocate_new_kernel_t)(
+ unsigned long indirection_page, unsigned long reboot_code_buffer,
+ unsigned long start_address, unsigned int has_pae);
+
+const extern unsigned char relocate_new_kernel[];
+extern void relocate_new_kernel_end(void);
+const extern unsigned int relocate_new_kernel_size;
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later. Currently nothing.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+void machine_kexec(struct kimage *image)
+{
+ unsigned long indirection_page;
+ unsigned long reboot_code_buffer;
+ relocate_new_kernel_t rnk;
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+ /* Compute some offsets */
+ reboot_code_buffer = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
+ indirection_page = image->head & PAGE_MASK;
+
+ /* Set up an identity mapping for the reboot_code_buffer */
+ identity_map_page(reboot_code_buffer);
+
+ /* copy it out */
+ memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size);
+
+ /* The segment registers are funny things, they are
+ * automatically loaded from a table, in memory wherever you
+ * set them to a specific selector, but this table is never
+ * accessed again you set the segment to a different selector.
+ *
+ * The more common model is are caches where the behide
+ * the scenes work is done, but is also dropped at arbitrary
+ * times.
+ *
+ * I take advantage of this here by force loading the
+ * segments, before I zap the gdt with an invalid value.
+ */
+ load_segments();
+ /* The gdt & idt are now invalid.
+ * If you want to load them you must set up your own idt & gdt.
+ */
+ set_gdt(phys_to_virt(0),0);
+ set_idt(phys_to_virt(0),0);
+
+ /* now call it */
+ rnk = (relocate_new_kernel_t) reboot_code_buffer;
+ (*rnk)(indirection_page, reboot_code_buffer, image->start, cpu_has_pae);
+}
__setup("idle=", idle_setup);
+void stack_overflow(void)
+{
+ extern unsigned long stack_overflowed;
+ unsigned long esp = current_stack_pointer();
+ int panicing = ((esp&(THREAD_SIZE-1)) <= STACK_PANIC);
+
+ oops_in_progress = 1;
+ printk( "esp: 0x%lx masked: 0x%lx STACK_PANIC:0x%lx %d %d\n",
+ esp, (esp&(THREAD_SIZE-1)), STACK_PANIC,
+ (((esp&(THREAD_SIZE-1)) <= STACK_PANIC)), panicing);
+ show_trace(current,(void*)esp);
+
+ if (panicing)
+ panic("stack overflow\n");
+
+ oops_in_progress = 0;
+
+ /* Just let it happen once per task, as otherwise it goes nuts
+ * in printing stack traces. This means that I need to dump
+ * the stack_overflowed boolean into the task or thread_info
+ * structure. For now just turn it off all together.
+ */
+
+ /* stack_overflowed = 0; */
+}
+
void show_regs(struct pt_regs * regs)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
int reboot_thru_bios;
#ifdef CONFIG_SMP
-int reboot_smp = 0;
static int reboot_cpu = -1;
/* shamelessly grabbed from lib/vsprintf.c for readability */
#define is_digit(c) ((c) >= '0' && (c) <= '9')
return 0;
}
-/*
- * Some machines require the "reboot=s" commandline option, this quirk makes that automatic.
- */
-static int __init set_smp_reboot(struct dmi_system_id *d)
-{
-#ifdef CONFIG_SMP
- if (!reboot_smp) {
- reboot_smp = 1;
- printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident);
- }
-#endif
- return 0;
-}
-
-/*
- * Some machines require the "reboot=b,s" commandline option, this quirk makes that automatic.
- */
-static int __init set_smp_bios_reboot(struct dmi_system_id *d)
-{
- set_smp_reboot(d);
- set_bios_reboot(d);
- return 0;
-}
-
static struct dmi_system_id __initdata reboot_dmi_table[] = {
{ /* Handle problems with rebooting on Dell 1300's */
- .callback = set_smp_bios_reboot,
+ .callback = set_bios_reboot,
.ident = "Dell PowerEdge 1300",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
: "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100)));
}
-void machine_restart(char * __unused)
+void machine_shutdown(void)
{
#ifdef CONFIG_SMP
- int cpuid;
-
- cpuid = GET_APIC_ID(apic_read(APIC_ID));
-
- if (reboot_smp) {
-
- /* check to see if reboot_cpu is valid
- if its not, default to the BSP */
- if ((reboot_cpu == -1) ||
- (reboot_cpu > (NR_CPUS -1)) ||
- !physid_isset(cpuid, phys_cpu_present_map))
- reboot_cpu = boot_cpu_physical_apicid;
-
- reboot_smp = 0; /* use this as a flag to only go through this once*/
- /* re-run this function on the other CPUs
- it will fall though this section since we have
- cleared reboot_smp, and do the reboot if it is the
- correct CPU, otherwise it halts. */
- if (reboot_cpu != cpuid)
- smp_call_function((void *)machine_restart , NULL, 1, 0);
+ int reboot_cpu_id;
+
+ /* The boot cpu is always logical cpu 0 */
+ reboot_cpu_id = 0;
+
+ /* See if there has been given a command line override */
+ if ((reboot_cpu_id != -1) && (reboot_cpu < NR_CPUS) &&
+ cpu_isset(reboot_cpu, cpu_online_map)) {
+ reboot_cpu_id = reboot_cpu;
}
- /* if reboot_cpu is still -1, then we want a tradional reboot,
- and if we are not running on the reboot_cpu,, halt */
- if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
- for (;;)
- __asm__ __volatile__ ("hlt");
+ /* Make certain the cpu I'm rebooting on is online */
+ if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
+ reboot_cpu_id = smp_processor_id();
}
- /*
- * Stop all CPUs and turn off local APICs and the IO-APIC, so
- * other OSs see a clean IRQ state.
+
+ /* Make certain I only run on the appropriate processor */
+ set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
+
+ /* O.K. Now that I'm on the appropriate processor, stop
+ * all of the others, and disable their local APICs.
*/
+
if (!netdump_mode)
smp_send_stop();
#elif defined(CONFIG_X86_LOCAL_APIC)
#ifdef CONFIG_X86_IO_APIC
disable_IO_APIC();
#endif
+}
+
+void machine_restart(char * __unused)
+{
+ machine_shutdown();
if (!reboot_thru_bios) {
if (efi_enabled) {
--- /dev/null
+/*
+ * relocate_kernel.S - put the kernel image in place to boot
+ * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/linkage.h>
+
+ /*
+ * Must be relocatable PIC code callable as a C function, that once
+ * it starts can not use the previous processes stack.
+ */
+ .globl relocate_new_kernel
+relocate_new_kernel:
+ /* read the arguments and say goodbye to the stack */
+ movl 4(%esp), %ebx /* indirection_page */
+ movl 8(%esp), %ebp /* reboot_code_buffer */
+ movl 12(%esp), %edx /* start address */
+ movl 16(%esp), %ecx /* cpu_has_pae */
+
+ /* zero out flags, and disable interrupts */
+ pushl $0
+ popfl
+
+ /* set a new stack at the bottom of our page... */
+ lea 4096(%ebp), %esp
+
+ /* store the parameters back on the stack */
+ pushl %edx /* store the start address */
+
+ /* Set cr0 to a known state:
+ * 31 0 == Paging disabled
+ * 18 0 == Alignment check disabled
+ * 16 0 == Write protect disabled
+ * 3 0 == No task switch
+ * 2 0 == Don't do FP software emulation.
+ * 0 1 == Proctected mode enabled
+ */
+ movl %cr0, %eax
+ andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax
+ orl $(1<<0), %eax
+ movl %eax, %cr0
+
+ /* clear cr4 if applicable */
+ testl %ecx, %ecx
+ jz 1f
+ /* Set cr4 to a known state:
+ * Setting everything to zero seems safe.
+ */
+ movl %cr4, %eax
+ andl $0, %eax
+ movl %eax, %cr4
+
+ jmp 1f
+1:
+
+ /* Flush the TLB (needed?) */
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ /* Do the copies */
+ cld
+0: /* top, read another word for the indirection page */
+ movl %ebx, %ecx
+ movl (%ebx), %ecx
+ addl $4, %ebx
+ testl $0x1, %ecx /* is it a destination page */
+ jz 1f
+ movl %ecx, %edi
+ andl $0xfffff000, %edi
+ jmp 0b
+1:
+ testl $0x2, %ecx /* is it an indirection page */
+ jz 1f
+ movl %ecx, %ebx
+ andl $0xfffff000, %ebx
+ jmp 0b
+1:
+ testl $0x4, %ecx /* is it the done indicator */
+ jz 1f
+ jmp 2f
+1:
+ testl $0x8, %ecx /* is it the source indicator */
+ jz 0b /* Ignore it otherwise */
+ movl %ecx, %esi /* For every source page do a copy */
+ andl $0xfffff000, %esi
+
+ movl $1024, %ecx
+ rep ; movsl
+ jmp 0b
+
+2:
+
+ /* To be certain of avoiding problems with self-modifying code
+ * I need to execute a serializing instruction here.
+ * So I flush the TLB, it's handy, and not processor dependent.
+ */
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ /* set all of the registers to known values */
+ /* leave %esp alone */
+
+ xorl %eax, %eax
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ ret
+relocate_new_kernel_end:
+
+ .globl relocate_new_kernel_size
+relocate_new_kernel_size:
+ .long relocate_new_kernel_end - relocate_new_kernel
CONFIG_CKRM_TYPE_TASKCLASS=y
CONFIG_CKRM_RES_NUMTASKS=y
CONFIG_CKRM_CPU_SCHEDULE=y
-CONFIG_CKRM_RES_BLKIO=y
+# CONFIG_CKRM_RES_BLKIO is not set
# CONFIG_CKRM_RES_MEM is not set
# CONFIG_CKRM_TYPE_SOCKETCLASS is not set
CONFIG_CKRM_RBCE=y
# CONFIG_MATH_EMULATION is not set
CONFIG_MTRR=y
CONFIG_REGPARM=y
+CONFIG_IRQSTACKS=y
+CONFIG_STACK_SIZE_SHIFT=13
+CONFIG_STACK_WARN=4000
+CONFIG_X86_STACK_CHECK=y
+CONFIG_STACK_PANIC=512
+CONFIG_KEXEC=y
#
# Power management options (ACPI, APM)
#
# Block devices
#
-# CONFIG_BLK_DEV_FD is not set
+CONFIG_BLK_DEV_FD=m
# CONFIG_BLK_DEV_XD is not set
CONFIG_BLK_CPQ_DA=m
CONFIG_BLK_CPQ_CISS_DA=m
+++ /dev/null
-/*
- * linux/drivers/block/cfq-iosched.c
- *
- * CFQ, or complete fairness queueing, disk scheduler.
- *
- * Based on ideas from a previously unfinished io
- * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
- *
- * Copyright (C) 2003 Jens Axboe <axboe@suse.de>
- */
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/blkdev.h>
-#include <linux/elevator.h>
-#include <linux/bio.h>
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/compiler.h>
-#include <linux/hash.h>
-#include <linux/rbtree.h>
-#include <linux/mempool.h>
-
-/*
- * tunables
- */
-static int cfq_quantum = 4;
-static int cfq_queued = 8;
-
-#define CFQ_QHASH_SHIFT 6
-#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
-#define list_entry_qhash(entry) list_entry((entry), struct cfq_queue, cfq_hash)
-
-#define CFQ_MHASH_SHIFT 8
-#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)
-#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)
-#define CFQ_MHASH_FN(sec) (hash_long(CFQ_MHASH_BLOCK((sec)),CFQ_MHASH_SHIFT))
-#define ON_MHASH(crq) !list_empty(&(crq)->hash)
-#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
-#define list_entry_hash(ptr) list_entry((ptr), struct cfq_rq, hash)
-
-#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
-
-#define RQ_DATA(rq) ((struct cfq_rq *) (rq)->elevator_private)
-
-static kmem_cache_t *crq_pool;
-static kmem_cache_t *cfq_pool;
-static mempool_t *cfq_mpool;
-
-struct cfq_data {
- struct list_head rr_list;
- struct list_head *dispatch;
- struct list_head *cfq_hash;
-
- struct list_head *crq_hash;
-
- unsigned int busy_queues;
- unsigned int max_queued;
-
- mempool_t *crq_pool;
-};
-
-struct cfq_queue {
- struct list_head cfq_hash;
- struct list_head cfq_list;
- struct rb_root sort_list;
- int pid;
- int queued[2];
-#if 0
- /*
- * with a simple addition like this, we can do io priorities. almost.
- * does need a split request free list, too.
- */
- int io_prio
-#endif
-};
-
-struct cfq_rq {
- struct rb_node rb_node;
- sector_t rb_key;
-
- struct request *request;
-
- struct cfq_queue *cfq_queue;
-
- struct list_head hash;
-};
-
-static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq);
-static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid);
-static void cfq_dispatch_sort(struct list_head *head, struct cfq_rq *crq);
-
-/*
- * lots of deadline iosched dupes, can be abstracted later...
- */
-static inline void __cfq_del_crq_hash(struct cfq_rq *crq)
-{
- list_del_init(&crq->hash);
-}
-
-static inline void cfq_del_crq_hash(struct cfq_rq *crq)
-{
- if (ON_MHASH(crq))
- __cfq_del_crq_hash(crq);
-}
-
-static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
-{
- cfq_del_crq_hash(crq);
-
- if (q->last_merge == crq->request)
- q->last_merge = NULL;
-}
-
-static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
-{
- struct request *rq = crq->request;
-
- BUG_ON(ON_MHASH(crq));
-
- list_add(&crq->hash, &cfqd->crq_hash[CFQ_MHASH_FN(rq_hash_key(rq))]);
-}
-
-static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
-{
- struct list_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
- struct list_head *entry, *next = hash_list->next;
-
- while ((entry = next) != hash_list) {
- struct cfq_rq *crq = list_entry_hash(entry);
- struct request *__rq = crq->request;
-
- next = entry->next;
-
- BUG_ON(!ON_MHASH(crq));
-
- if (!rq_mergeable(__rq)) {
- __cfq_del_crq_hash(crq);
- continue;
- }
-
- if (rq_hash_key(__rq) == offset)
- return __rq;
- }
-
- return NULL;
-}
-
-/*
- * rb tree support functions
- */
-#define RB_NONE (2)
-#define RB_EMPTY(node) ((node)->rb_node == NULL)
-#define RB_CLEAR(node) ((node)->rb_color = RB_NONE)
-#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)
-#define ON_RB(node) ((node)->rb_color != RB_NONE)
-#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
-#define rq_rb_key(rq) (rq)->sector
-
-static inline void cfq_del_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
-{
- if (ON_RB(&crq->rb_node)) {
- cfqq->queued[rq_data_dir(crq->request)]--;
- rb_erase(&crq->rb_node, &cfqq->sort_list);
- crq->cfq_queue = NULL;
- }
-}
-
-static struct cfq_rq *
-__cfq_add_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
-{
- struct rb_node **p = &cfqq->sort_list.rb_node;
- struct rb_node *parent = NULL;
- struct cfq_rq *__crq;
-
- while (*p) {
- parent = *p;
- __crq = rb_entry_crq(parent);
-
- if (crq->rb_key < __crq->rb_key)
- p = &(*p)->rb_left;
- else if (crq->rb_key > __crq->rb_key)
- p = &(*p)->rb_right;
- else
- return __crq;
- }
-
- rb_link_node(&crq->rb_node, parent, p);
- return 0;
-}
-
-static void
-cfq_add_crq_rb(struct cfq_data *cfqd, struct cfq_queue *cfqq,struct cfq_rq *crq)
-{
- struct request *rq = crq->request;
- struct cfq_rq *__alias;
-
- crq->rb_key = rq_rb_key(rq);
- cfqq->queued[rq_data_dir(rq)]++;
-retry:
- __alias = __cfq_add_crq_rb(cfqq, crq);
- if (!__alias) {
- rb_insert_color(&crq->rb_node, &cfqq->sort_list);
- crq->cfq_queue = cfqq;
- return;
- }
-
- cfq_del_crq_rb(cfqq, __alias);
- cfq_dispatch_sort(cfqd->dispatch, __alias);
- goto retry;
-}
-
-static struct request *
-cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
-{
- struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->tgid);
- struct rb_node *n;
-
- if (!cfqq)
- goto out;
-
- n = cfqq->sort_list.rb_node;
- while (n) {
- struct cfq_rq *crq = rb_entry_crq(n);
-
- if (sector < crq->rb_key)
- n = n->rb_left;
- else if (sector > crq->rb_key)
- n = n->rb_right;
- else
- return crq->request;
- }
-
-out:
- return NULL;
-}
-
-static void cfq_remove_request(request_queue_t *q, struct request *rq)
-{
- struct cfq_data *cfqd = q->elevator.elevator_data;
- struct cfq_rq *crq = RQ_DATA(rq);
-
- if (crq) {
- struct cfq_queue *cfqq = crq->cfq_queue;
-
- cfq_remove_merge_hints(q, crq);
- list_del_init(&rq->queuelist);
-
- if (cfqq) {
- cfq_del_crq_rb(cfqq, crq);
-
- if (RB_EMPTY(&cfqq->sort_list))
- cfq_put_queue(cfqd, cfqq);
- }
- }
-}
-
-static int
-cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
-{
- struct cfq_data *cfqd = q->elevator.elevator_data;
- struct request *__rq;
- int ret;
-
- ret = elv_try_last_merge(q, bio);
- if (ret != ELEVATOR_NO_MERGE) {
- __rq = q->last_merge;
- goto out_insert;
- }
-
- __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
- if (__rq) {
- BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
- if (elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_BACK_MERGE;
- goto out;
- }
- }
-
- __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
- if (__rq) {
- if (elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_FRONT_MERGE;
- goto out;
- }
- }
-
- return ELEVATOR_NO_MERGE;
-out:
- q->last_merge = __rq;
-out_insert:
- *req = __rq;
- return ret;
-}
-
-static void cfq_merged_request(request_queue_t *q, struct request *req)
-{
- struct cfq_data *cfqd = q->elevator.elevator_data;
- struct cfq_rq *crq = RQ_DATA(req);
-
- cfq_del_crq_hash(crq);
- cfq_add_crq_hash(cfqd, crq);
-
- if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
- struct cfq_queue *cfqq = crq->cfq_queue;
-
- cfq_del_crq_rb(cfqq, crq);
- cfq_add_crq_rb(cfqd, cfqq, crq);
- }
-
- q->last_merge = req;
-}
-
-static void
-cfq_merged_requests(request_queue_t *q, struct request *req,
- struct request *next)
-{
- cfq_merged_request(q, req);
- cfq_remove_request(q, next);
-}
-
-static void cfq_dispatch_sort(struct list_head *head, struct cfq_rq *crq)
-{
- struct list_head *entry = head;
- struct request *__rq;
-
- if (!list_empty(head)) {
- __rq = list_entry_rq(head->next);
-
- if (crq->request->sector < __rq->sector) {
- entry = head->prev;
- goto link;
- }
- }
-
- while ((entry = entry->prev) != head) {
- __rq = list_entry_rq(entry);
-
- if (crq->request->sector <= __rq->sector)
- break;
- }
-
-link:
- list_add_tail(&crq->request->queuelist, entry);
-}
-
-static inline void
-__cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd,
- struct cfq_queue *cfqq)
-{
- struct cfq_rq *crq = rb_entry_crq(rb_first(&cfqq->sort_list));
-
- cfq_del_crq_rb(cfqq, crq);
- cfq_remove_merge_hints(q, crq);
- cfq_dispatch_sort(cfqd->dispatch, crq);
-}
-
-static int cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd)
-{
- struct cfq_queue *cfqq;
- struct list_head *entry, *tmp;
- int ret, queued, good_queues;
-
- if (list_empty(&cfqd->rr_list))
- return 0;
-
- queued = ret = 0;
-restart:
- good_queues = 0;
- list_for_each_safe(entry, tmp, &cfqd->rr_list) {
- cfqq = list_entry_cfqq(cfqd->rr_list.next);
-
- BUG_ON(RB_EMPTY(&cfqq->sort_list));
-
- __cfq_dispatch_requests(q, cfqd, cfqq);
-
- if (RB_EMPTY(&cfqq->sort_list))
- cfq_put_queue(cfqd, cfqq);
- else
- good_queues++;
-
- queued++;
- ret = 1;
- }
-
- if ((queued < cfq_quantum) && good_queues)
- goto restart;
-
- return ret;
-}
-
-static struct request *cfq_next_request(request_queue_t *q)
-{
- struct cfq_data *cfqd = q->elevator.elevator_data;
- struct request *rq;
-
- if (!list_empty(cfqd->dispatch)) {
- struct cfq_rq *crq;
-dispatch:
- rq = list_entry_rq(cfqd->dispatch->next);
-
- crq = RQ_DATA(rq);
- if (crq)
- cfq_remove_merge_hints(q, crq);
-
- return rq;
- }
-
- if (cfq_dispatch_requests(q, cfqd))
- goto dispatch;
-
- return NULL;
-}
-
-static inline struct cfq_queue *
-__cfq_find_cfq_hash(struct cfq_data *cfqd, int pid, const int hashval)
-{
- struct list_head *hash_list = &cfqd->cfq_hash[hashval];
- struct list_head *entry;
-
- list_for_each(entry, hash_list) {
- struct cfq_queue *__cfqq = list_entry_qhash(entry);
-
- if (__cfqq->pid == pid)
- return __cfqq;
- }
-
- return NULL;
-}
-
-static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid)
-{
- const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT);
-
- return __cfq_find_cfq_hash(cfqd, pid, hashval);
-}
-
-static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- cfqd->busy_queues--;
- list_del(&cfqq->cfq_list);
- list_del(&cfqq->cfq_hash);
- mempool_free(cfqq, cfq_mpool);
-}
-
-static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, int pid)
-{
- const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT);
- struct cfq_queue *cfqq = __cfq_find_cfq_hash(cfqd, pid, hashval);
-
- if (!cfqq) {
- cfqq = mempool_alloc(cfq_mpool, GFP_NOIO);
-
- INIT_LIST_HEAD(&cfqq->cfq_hash);
- INIT_LIST_HEAD(&cfqq->cfq_list);
- RB_CLEAR_ROOT(&cfqq->sort_list);
-
- cfqq->pid = pid;
- cfqq->queued[0] = cfqq->queued[1] = 0;
- list_add(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
- }
-
- return cfqq;
-}
-
-static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq)
-{
- struct cfq_queue *cfqq;
-
- cfqq = cfq_get_queue(cfqd, current->tgid);
-
- cfq_add_crq_rb(cfqd, cfqq, crq);
-
- if (list_empty(&cfqq->cfq_list)) {
- list_add(&cfqq->cfq_list, &cfqd->rr_list);
- cfqd->busy_queues++;
- }
-}
-
-static void
-cfq_insert_request(request_queue_t *q, struct request *rq, int where)
-{
- struct cfq_data *cfqd = q->elevator.elevator_data;
- struct cfq_rq *crq = RQ_DATA(rq);
-
- switch (where) {
- case ELEVATOR_INSERT_BACK:
- while (cfq_dispatch_requests(q, cfqd))
- ;
- list_add_tail(&rq->queuelist, cfqd->dispatch);
- break;
- case ELEVATOR_INSERT_FRONT:
- list_add(&rq->queuelist, cfqd->dispatch);
- break;
- case ELEVATOR_INSERT_SORT:
- BUG_ON(!blk_fs_request(rq));
- cfq_enqueue(cfqd, crq);
- break;
- default:
- printk("%s: bad insert point %d\n", __FUNCTION__,where);
- return;
- }
-
- if (rq_mergeable(rq)) {
- cfq_add_crq_hash(cfqd, crq);
-
- if (!q->last_merge)
- q->last_merge = rq;
- }
-}
-
-static int cfq_queue_empty(request_queue_t *q)
-{
- struct cfq_data *cfqd = q->elevator.elevator_data;
-
- if (list_empty(cfqd->dispatch) && list_empty(&cfqd->rr_list))
- return 1;
-
- return 0;
-}
-
-static struct request *
-cfq_former_request(request_queue_t *q, struct request *rq)
-{
- struct cfq_rq *crq = RQ_DATA(rq);
- struct rb_node *rbprev = rb_prev(&crq->rb_node);
-
- if (rbprev)
- return rb_entry_crq(rbprev)->request;
-
- return NULL;
-}
-
-static struct request *
-cfq_latter_request(request_queue_t *q, struct request *rq)
-{
- struct cfq_rq *crq = RQ_DATA(rq);
- struct rb_node *rbnext = rb_next(&crq->rb_node);
-
- if (rbnext)
- return rb_entry_crq(rbnext)->request;
-
- return NULL;
-}
-
-static int cfq_may_queue(request_queue_t *q, int rw)
-{
- struct cfq_data *cfqd = q->elevator.elevator_data;
- struct cfq_queue *cfqq;
- int ret = 1;
-
- if (!cfqd->busy_queues)
- goto out;
-
- cfqq = cfq_find_cfq_hash(cfqd, current->tgid);
- if (cfqq) {
- int limit = (q->nr_requests - cfq_queued) / cfqd->busy_queues;
-
- if (limit < 3)
- limit = 3;
- else if (limit > cfqd->max_queued)
- limit = cfqd->max_queued;
-
- if (cfqq->queued[rw] > limit)
- ret = 0;
- }
-out:
- return ret;
-}
-
-static void cfq_put_request(request_queue_t *q, struct request *rq)
-{
- struct cfq_data *cfqd = q->elevator.elevator_data;
- struct cfq_rq *crq = RQ_DATA(rq);
-
- if (crq) {
- BUG_ON(q->last_merge == rq);
- BUG_ON(ON_MHASH(crq));
-
- mempool_free(crq, cfqd->crq_pool);
- rq->elevator_private = NULL;
- }
-}
-
-static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
-{
- struct cfq_data *cfqd = q->elevator.elevator_data;
- struct cfq_rq *crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
-
- if (crq) {
- RB_CLEAR(&crq->rb_node);
- crq->request = rq;
- crq->cfq_queue = NULL;
- INIT_LIST_HEAD(&crq->hash);
- rq->elevator_private = crq;
- return 0;
- }
-
- return 1;
-}
-
-static void cfq_exit(request_queue_t *q, elevator_t *e)
-{
- struct cfq_data *cfqd = e->elevator_data;
-
- e->elevator_data = NULL;
- mempool_destroy(cfqd->crq_pool);
- kfree(cfqd->crq_hash);
- kfree(cfqd->cfq_hash);
- kfree(cfqd);
-}
-
-static int cfq_init(request_queue_t *q, elevator_t *e)
-{
- struct cfq_data *cfqd;
- int i;
-
- cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
- if (!cfqd)
- return -ENOMEM;
-
- memset(cfqd, 0, sizeof(*cfqd));
- INIT_LIST_HEAD(&cfqd->rr_list);
-
- cfqd->crq_hash = kmalloc(sizeof(struct list_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
- if (!cfqd->crq_hash)
- goto out_crqhash;
-
- cfqd->cfq_hash = kmalloc(sizeof(struct list_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
- if (!cfqd->cfq_hash)
- goto out_cfqhash;
-
- cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool);
- if (!cfqd->crq_pool)
- goto out_crqpool;
-
- for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
- INIT_LIST_HEAD(&cfqd->crq_hash[i]);
- for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
- INIT_LIST_HEAD(&cfqd->cfq_hash[i]);
-
- cfqd->dispatch = &q->queue_head;
- e->elevator_data = cfqd;
-
- /*
- * just set it to some high value, we want anyone to be able to queue
- * some requests. fairness is handled differently
- */
- cfqd->max_queued = q->nr_requests;
- q->nr_requests = 8192;
-
- return 0;
-out_crqpool:
- kfree(cfqd->cfq_hash);
-out_cfqhash:
- kfree(cfqd->crq_hash);
-out_crqhash:
- kfree(cfqd);
- return -ENOMEM;
-}
-
-static int __init cfq_slab_setup(void)
-{
- crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
- NULL, NULL);
-
- if (!crq_pool)
- panic("cfq_iosched: can't init crq pool\n");
-
- cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
- NULL, NULL);
-
- if (!cfq_pool)
- panic("cfq_iosched: can't init cfq pool\n");
-
- cfq_mpool = mempool_create(64, mempool_alloc_slab, mempool_free_slab, cfq_pool);
-
- if (!cfq_mpool)
- panic("cfq_iosched: can't init cfq mpool\n");
-
- return 0;
-}
-
-subsys_initcall(cfq_slab_setup);
-
-elevator_t iosched_cfq = {
- .elevator_name = "cfq",
- .elevator_merge_fn = cfq_merge,
- .elevator_merged_fn = cfq_merged_request,
- .elevator_merge_req_fn = cfq_merged_requests,
- .elevator_next_req_fn = cfq_next_request,
- .elevator_add_req_fn = cfq_insert_request,
- .elevator_remove_req_fn = cfq_remove_request,
- .elevator_queue_empty_fn = cfq_queue_empty,
- .elevator_former_req_fn = cfq_former_request,
- .elevator_latter_req_fn = cfq_latter_request,
- .elevator_set_req_fn = cfq_set_request,
- .elevator_put_req_fn = cfq_put_request,
- .elevator_may_queue_fn = cfq_may_queue,
- .elevator_init_fn = cfq_init,
- .elevator_exit_fn = cfq_exit,
-};
-
-EXPORT_SYMBOL(iosched_cfq);
#error Cannot support this many io priority levels
#endif
-#define LIMIT_DEBUG 1
-
/*
* tunables
*/
static int cfq_grace_rt = HZ / 100 ?: 1;
static int cfq_grace_idle = HZ / 10;
+#define CFQ_EPOCH 1000000000
+#define CFQ_SECTORATE 1000
+#define CFQ_HMAX_PCT 80
+
#define CFQ_QHASH_SHIFT 6
#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
#define cfq_account_io(crq) \
((crq)->ioprio != IOPRIO_IDLE && (crq)->ioprio != IOPRIO_RT)
-/* define to be 50 ms for now; make tunable later */
-#define CFQ_EPOCH 50000
-/* Needs to be made tunable right away, in MiB/s */
-#define CFQ_DISKBW 10
-/* Temporary global limit, as percent of available b/w, for each "class" */
-#define CFQ_TEMPLIM 10
-
/*
* defines how we distribute bandwidth (can be tgid, uid, etc)
*/
*/
#if defined(CONFIG_CKRM_RES_BLKIO) || defined(CONFIG_CKRM_RES_BLKIO_MODULE)
-extern inline void *cki_hash_key(struct task_struct *tsk);
-extern inline int cki_ioprio(struct task_struct *tsk);
-#define cfq_hash_key(current) ((int)cki_hash_key((current)))
-#define cfq_ioprio(current) (cki_ioprio((current)))
+extern void *cki_hash_key(struct task_struct *tsk);
+extern int cki_ioprio(struct task_struct *tsk);
+extern void *cki_cfqpriv(struct task_struct *tsk);
+
+#define cfq_hash_key(tsk) ((int)cki_hash_key((tsk)))
+#define cfq_ioprio(tsk) (cki_ioprio((tsk)))
+#define cfq_cfqpriv(cfqd,tsk) (cki_cfqpriv((tsk)))
#else
-#define cfq_hash_key(current) ((current)->tgid)
+#define cfq_hash_key(tsk) ((tsk)->tgid)
+#define cfq_cfqpriv(cfqd,tsk) (&(((cfqd)->cid[(tsk)->ioprio]).cfqpriv))
/*
* move to io_context
*/
-#define cfq_ioprio(current) ((current)->ioprio)
+#define cfq_ioprio(tsk) ((tsk)->ioprio)
#endif
#define CFQ_WAIT_RT 0
atomic_t cum_sectors_in,cum_sectors_out;
atomic_t cum_queues_in,cum_queues_out;
-#ifdef LIMIT_DEBUG
- int nskip;
- unsigned long navsec;
- unsigned long csectorate;
- unsigned long lsectorate;
-#endif
+ cfqlim_t cfqpriv; /* data for enforcing limits */
struct list_head prio_list;
int last_rq;
int last_sectors;
+
};
/*
unsigned int cfq_grace_rt;
unsigned int cfq_grace_idle;
- unsigned long cfq_epoch; /* duration for limit enforcement */
- unsigned long cfq_epochsectors; /* max sectors dispatchable/epoch */
+ unsigned int cfq_epoch;
+ unsigned int cfq_hmax_pct;
+ unsigned int cfq_qsectorate;
};
/*
int queued[2];
int ioprio;
+ /* limit related settings/stats obtained
+ either from io_prio_data or ckrm I/O class
+ */
+ struct cfqlim *cfqpriv;
+
+ u64 epstart; /* current epoch's starting timestamp (ns) */
+ u64 epsector[2]; /* Total sectors dispatched in [0] previous
+ * and [1] current epoch
+ */
+
unsigned long avsec; /* avg sectors dispatched/epoch */
- unsigned long long lastime; /* timestamp of last request served */
- unsigned long sectorate; /* limit for sectors served/epoch */
+// unsigned long long lastime; /* timestamp of last request served */
+// unsigned long sectorate; /* limit for sectors served/epoch */
int skipped; /* queue skipped at last dispatch ? */
+
+ /* Per queue timer to suspend/resume queue from processing */
+ struct timer_list timer;
+ unsigned long wait_end;
+ unsigned long flags;
+ struct work_struct work;
+
+ struct cfq_data *cfqd;
};
+
+
/*
- * per-request structure
+ * Per-request structure
*/
struct cfq_rq {
struct cfq_queue *cfq_queue;
list_add_tail(&crq->request->queuelist, entry);
}
-/*
- * remove from io scheduler core and put on dispatch list for service
- */
+struct cfq_queue *dcfqq;
+u64 dtmp;
+
+
+
+/* Over how many ns is sectorate defined */
+#define NS4SCALE (100000000)
+
static inline int
-__cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd,
- struct cfq_queue *cfqq)
+__cfq_check_limit(struct cfq_data *cfqd,struct cfq_queue *cfqq, int dontskip)
{
struct cfq_rq *crq;
- unsigned long long ts, gap;
- unsigned long newavsec;
+ unsigned long long ts, gap, epoch, tmp;
+ unsigned long newavsec, sectorate;
crq = rb_entry_crq(rb_first(&cfqq->sort_list));
-#if 1
- /* Determine if queue should be skipped for being overshare */
ts = sched_clock();
- gap = ts - cfqq->lastime;
-#ifdef LIMIT_DEBUG
- cfqq->sectorate = (cfqd->cfq_epochsectors
- * CFQ_TEMPLIM)/100;
-
-#endif
- if ((gap >= cfqd->cfq_epoch) || (gap < 0)) {
- cfqq->avsec = crq->nr_sectors ;
- cfqq->lastime = ts;
+ gap = ts - cfqq->epstart;
+ epoch = cfqd->cfq_epoch;
+
+ sectorate = atomic_read(&cfqq->cfqpriv->sectorate);
+// sectorate = atomic_read(&(cfqd->cid[crq->ioprio].sectorate));
+
+ dcfqq = cfqq;
+
+ if ((gap >= epoch) || (gap < 0)) {
+
+ if (gap >= (epoch << 1)) {
+ cfqq->epsector[0] = 0;
+ cfqq->epstart = ts ;
+ } else {
+ cfqq->epsector[0] = cfqq->epsector[1];
+ cfqq->epstart += epoch;
+ }
+ cfqq->epsector[1] = 0;
+ gap = ts - cfqq->epstart;
+
+ tmp = (cfqq->epsector[0] + crq->nr_sectors) * NS4SCALE;
+ do_div(tmp,epoch+gap);
+
+ cfqq->avsec = (unsigned long)tmp;
+ cfqq->skipped = 0;
+ cfqq->epsector[1] += crq->nr_sectors;
+
+ cfqq->cfqpriv->navsec = cfqq->avsec;
+ cfqq->cfqpriv->sec[0] = cfqq->epsector[0];
+ cfqq->cfqpriv->sec[1] = cfqq->epsector[1];
+ cfqq->cfqpriv->timedout++;
+ /*
+ cfqd->cid[crq->ioprio].navsec = cfqq->avsec;
+ cfqd->cid[crq->ioprio].sec[0] = cfqq->epsector[0];
+ cfqd->cid[crq->ioprio].sec[1] = cfqq->epsector[1];
+ cfqd->cid[crq->ioprio].timedout++;
+ */
+ return 0;
} else {
- u64 tmp;
- /* Age old average and accumalate request to be served */
-
-// tmp = (u64) (cfqq->avsec * gap) ;
-// do_div(tmp, cfqd->cfq_epoch);
- newavsec = (unsigned long)(cfqq->avsec >> 1) + crq->nr_sectors;
-// if (crq->ioprio >= 0 && crq->ioprio <= 20)
-// cfqd->cid[crq->ioprio].lsectorate = newavsec;
-// atomic_set(&(cfqd->cid[crq->ioprio].lsectorate),
-// newavsec);
-
- if ((newavsec < cfqq->sectorate) || cfqq->skipped) {
+
+ tmp = (cfqq->epsector[0] + cfqq->epsector[1] + crq->nr_sectors)
+ * NS4SCALE;
+ do_div(tmp,epoch+gap);
+
+ newavsec = (unsigned long)tmp;
+ if ((newavsec < sectorate) || dontskip) {
cfqq->avsec = newavsec ;
- cfqq->lastime = ts;
cfqq->skipped = 0;
+ cfqq->epsector[1] += crq->nr_sectors;
+ cfqq->cfqpriv->navsec = cfqq->avsec;
+ cfqq->cfqpriv->sec[1] = cfqq->epsector[1];
+ /*
+ cfqd->cid[crq->ioprio].navsec = cfqq->avsec;
+ cfqd->cid[crq->ioprio].sec[1] = cfqq->epsector[1];
+ */
} else {
- /* queue over share ; skip once */
cfqq->skipped = 1;
-#ifdef LIMIT_DEBUG
-// atomic_inc(&(cfqd->cid[crq->ioprio].nskip));
-// if (crq->ioprio >= 0 && crq->ioprio <= 20)
-// cfqd->cid[crq->ioprio].nskip++;
-#endif
- return 0;
+ /* pause q's processing till avsec drops to
+ cfq_hmax_pct % of its value */
+ tmp = (epoch+gap) * (100-cfqd->cfq_hmax_pct);
+ do_div(tmp,1000000*cfqd->cfq_hmax_pct);
+ cfqq->wait_end = jiffies+msecs_to_jiffies(tmp);
}
- }
-#endif
+ }
+}
-#ifdef LIMIT_DEBUG
-// if (crq->ioprio >= 0 && crq->ioprio <= 20) {
-// cfqd->cid[crq->ioprio].navsec = cfqq->avsec;
-// cfqd->cid[crq->ioprio].csectorate = cfqq->sectorate;
-// }
+/*
+ * remove from io scheduler core and put on dispatch list for service
+ */
+static inline int
+__cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd,
+ struct cfq_queue *cfqq)
+{
+ struct cfq_rq *crq;
+
+ crq = rb_entry_crq(rb_first(&cfqq->sort_list));
-// atomic_set(&(cfqd->cid[crq->ioprio].navsec),cfqq->avsec);
-// atomic_set(&(cfqd->cid[crq->ioprio].csectorate),cfqq->sectorate);
-#endif
cfq_dispatch_sort(cfqd, cfqq, crq);
/*
{
struct cfq_data *cfqd = q->elevator.elevator_data;
struct list_head *plist = &cfqd->cid[prio].rr_list;
+ struct cfq_queue *cfqq;
struct list_head *entry, *nxt;
int q_rq, q_io;
- int ret ;
+ int first_round,busy_queues,busy_unlimited;
+
/*
* for each queue at this prio level, dispatch a request
*/
q_rq = q_io = 0;
+ first_round=1;
+ restart:
+ busy_unlimited = 0;
+ busy_queues = 0;
list_for_each_safe(entry, nxt, plist) {
- struct cfq_queue *cfqq = list_entry_cfqq(entry);
+ cfqq = list_entry_cfqq(entry);
BUG_ON(RB_EMPTY(&cfqq->sort_list));
+ busy_queues++;
- ret = __cfq_dispatch_requests(q, cfqd, cfqq);
- if (ret <= 0) {
- continue; /* skip queue */
- /* can optimize more by moving q to end of plist ? */
+
+ if (first_round || busy_unlimited)
+ __cfq_check_limit(cfqd,cfqq,0);
+ else
+ __cfq_check_limit(cfqd,cfqq,1);
+
+ if (cfqq->skipped) {
+ cfqq->cfqpriv->nskip++;
+ /* cfqd->cid[prio].nskip++; */
+ busy_queues--;
+ if (time_before(jiffies, cfqq->wait_end)) {
+ list_del(&cfqq->cfq_list);
+ mod_timer(&cfqq->timer,cfqq->wait_end);
+ }
+ continue;
}
- q_io += ret ;
- q_rq++ ;
+ busy_unlimited++;
+
+ q_io += __cfq_dispatch_requests(q, cfqd, cfqq);
+ q_rq++;
- if (RB_EMPTY(&cfqq->sort_list))
+ if (RB_EMPTY(&cfqq->sort_list)) {
+ busy_unlimited--;
+ busy_queues--;
cfq_put_queue(cfqd, cfqq);
- /*
- * if we hit the queue limit, put the string of serviced
- * queues at the back of the pending list
- */
+ }
+
if (q_io >= max_sectors || q_rq >= max_rq) {
+#if 0
struct list_head *prv = nxt->prev;
if (prv != plist) {
list_del(plist);
list_add(plist, prv);
}
+#endif
break;
}
}
+ if ((q_io < max_sectors) && (q_rq < max_rq) &&
+ (busy_queues || first_round))
+ {
+ first_round = 0;
+ goto restart;
+ } else {
+ /*
+ * if we hit the queue limit, put the string of serviced
+ * queues at the back of the pending list
+ */
+ struct list_head *prv = nxt->prev;
+ if (prv != plist) {
+ list_del(plist);
+ list_add(plist, prv);
+ }
+ }
+
cfqd->cid[prio].last_rq = q_rq;
cfqd->cid[prio].last_sectors = q_io;
return q_rq;
mempool_free(cfqq, cfq_mpool);
}
+static void cfq_pauseq_timer(unsigned long data)
+{
+ struct cfq_queue *cfqq = (struct cfq_queue *) data;
+ kblockd_schedule_work(&cfqq->work);
+}
+
+static void cfq_pauseq_work(void *data)
+{
+ struct cfq_queue *cfqq = (struct cfq_queue *) data;
+ struct cfq_data *cfqd = cfqq->cfqd;
+ request_queue_t *q = cfqd->queue;
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ list_add_tail(&cfqq->cfq_list,&cfqd->cid[cfqq->ioprio].rr_list);
+ cfqq->skipped = 0;
+ if (cfq_next_request(q))
+ q->request_fn(q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ //del_timer(&cfqq->timer);
+}
+
static struct cfq_queue *__cfq_get_queue(struct cfq_data *cfqd, int hashkey,
int gfp_mask)
{
INIT_LIST_HEAD(&cfqq->cfq_list);
cfqq->hash_key = cfq_hash_key(current);
cfqq->ioprio = cfq_ioprio(current);
- cfqq->avsec = 0 ;
- cfqq->lastime = sched_clock();
- cfqq->sectorate = (cfqd->cfq_epochsectors * CFQ_TEMPLIM)/100;
+
+ cfqq->cfqpriv = cfq_cfqpriv(cfqd,current);
+ if (!cfqq->cfqpriv)
+ cfqq->cfqpriv = &((cfqd->cid[cfqq->ioprio]).cfqpriv);
+
+ cfqq->epstart = sched_clock();
+ /* epsector, avsec, skipped initialized to zero by memset */
+
+ init_timer(&cfqq->timer);
+ cfqq->timer.function = cfq_pauseq_timer;
+ cfqq->timer.data = (unsigned long) cfqq;
+
+ INIT_WORK(&cfqq->work, cfq_pauseq_work, cfqq);
+
+ cfqq->cfqd = cfqd ;
+
hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
}
kfree(cfqd);
}
+
+
static void cfq_timer(unsigned long data)
{
struct cfq_data *cfqd = (struct cfq_data *) data;
atomic_set(&cid->cum_sectors_out,0);
atomic_set(&cid->cum_queues_in,0);
atomic_set(&cid->cum_queues_out,0);
-#if 0
- atomic_set(&cid->nskip,0);
- atomic_set(&cid->navsec,0);
- atomic_set(&cid->csectorate,0);
- atomic_set(&cid->lsectorate,0);
-#endif
+
+
+ atomic_set(&((cid->cfqpriv).sectorate),CFQ_SECTORATE);
+ (cid->cfqpriv).nskip = 0;
+ (cid->cfqpriv).navsec = 0;
+ (cid->cfqpriv).timedout = 0;
}
cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES,
cfqd->cfq_idle_quantum_io = cfq_idle_quantum_io;
cfqd->cfq_grace_rt = cfq_grace_rt;
cfqd->cfq_grace_idle = cfq_grace_idle;
+
+ cfqd->cfq_epoch = CFQ_EPOCH;
+ cfqd->cfq_hmax_pct = CFQ_HMAX_PCT;
q->nr_requests <<= 2;
e->elevator_data = cfqd;
cfqd->queue = q;
- cfqd->cfq_epoch = CFQ_EPOCH;
- if (q->hardsect_size)
- cfqd->cfq_epochsectors = ((CFQ_DISKBW * 1000000)/
- q->hardsect_size)* (1000000 / CFQ_EPOCH);
- else
- cfqd->cfq_epochsectors = ((CFQ_DISKBW * 1000000)/512)
- * (1000000 / CFQ_EPOCH) ;
-
return 0;
out_crqpool:
kfree(cfqd->cfq_hash);
SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued);
SHOW_FUNCTION(cfq_grace_rt_show, cfqd->cfq_grace_rt);
SHOW_FUNCTION(cfq_grace_idle_show, cfqd->cfq_grace_idle);
+SHOW_FUNCTION(cfq_epoch_show, cfqd->cfq_epoch);
+SHOW_FUNCTION(cfq_hmax_pct_show, cfqd->cfq_hmax_pct);
#undef SHOW_FUNCTION
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, INT_MAX);
STORE_FUNCTION(cfq_grace_rt_store, &cfqd->cfq_grace_rt, 0, INT_MAX);
STORE_FUNCTION(cfq_grace_idle_store, &cfqd->cfq_grace_idle, 0, INT_MAX);
+STORE_FUNCTION(cfq_epoch_store, &cfqd->cfq_epoch, 0, INT_MAX);
+STORE_FUNCTION(cfq_hmax_pct_store, &cfqd->cfq_hmax_pct, 1, 100);
#undef STORE_FUNCTION
-static ssize_t cfq_epoch_show(struct cfq_data *cfqd, char *page)
-{
- return sprintf(page, "%lu\n", cfqd->cfq_epoch);
-}
-
-static ssize_t cfq_epoch_store(struct cfq_data *cfqd, const char *page, size_t count)
-{
- char *p = (char *) page;
- cfqd->cfq_epoch = simple_strtoul(p, &p, 10);
- return count;
-}
-
-static ssize_t cfq_epochsectors_show(struct cfq_data *cfqd, char *page)
-{
- return sprintf(page, "%lu\n", cfqd->cfq_epochsectors);
-}
-
-static ssize_t
-cfq_epochsectors_store(struct cfq_data *cfqd, const char *page, size_t count)
-{
- char *p = (char *) page;
- cfqd->cfq_epochsectors = simple_strtoul(p, &p, 10);
- return count;
-}
-
/* Additional entries to get priority level data */
static ssize_t
cfq_prio_show(struct cfq_data *cfqd, char *page, unsigned int priolvl)
{
- int r1,r2,s1,s2,q1,q2;
+ //int r1,r2,s1,s2,q1,q2;
if (!(priolvl >= IOPRIO_IDLE && priolvl <= IOPRIO_RT))
return 0;
+ /*
r1 = (int)atomic_read(&(cfqd->cid[priolvl].cum_rq_in));
r2 = (int)atomic_read(&(cfqd->cid[priolvl].cum_rq_out));
s1 = (int)atomic_read(&(cfqd->cid[priolvl].cum_sectors_in));
s2 = (int)atomic_read(&(cfqd->cid[priolvl].cum_sectors_out));
q1 = (int)atomic_read(&(cfqd->cid[priolvl].cum_queues_in));
q2 = (int)atomic_read(&(cfqd->cid[priolvl].cum_queues_out));
-
- return sprintf(page,"skip %d avsec %lu rate %lu new %lu"
- "rq (%d,%d) sec (%d,%d) q (%d,%d)\n",
- cfqd->cid[priolvl].nskip,
- cfqd->cid[priolvl].navsec,
- cfqd->cid[priolvl].csectorate,
- cfqd->cid[priolvl].lsectorate,
-// atomic_read(&cfqd->cid[priolvl].nskip),
-// atomic_read(&cfqd->cid[priolvl].navsec),
-// atomic_read(&cfqd->cid[priolvl].csectorate),
-// atomic_read(&cfqd->cid[priolvl].lsectorate),
- r1,r2,
- s1,s2,
- q1,q2);
+ */
+
+ return sprintf(page,"skip %d timdout %d avsec %lu rate %ld "
+ " sec0 %lu sec1 %lu\n",
+ cfqd->cid[priolvl].cfqpriv.nskip,
+ cfqd->cid[priolvl].cfqpriv.timedout,
+ cfqd->cid[priolvl].cfqpriv.navsec,
+ atomic_read(&(cfqd->cid[priolvl].cfqpriv.sectorate)),
+ (unsigned long)cfqd->cid[priolvl].cfqpriv.sec[0],
+ (unsigned long)cfqd->cid[priolvl].cfqpriv.sec[1]);
+
}
#define SHOW_PRIO_DATA(__PRIOLVL) \
static ssize_t cfq_prio_store(struct cfq_data *cfqd, const char *page, size_t count, int priolvl)
{
+
+ char *p = (char *) page;
+ int val;
+
+ val = (int) simple_strtoul(p, &p, 10);
+
+ atomic_set(&(cfqd->cid[priolvl].cfqpriv.sectorate),val);
+ cfqd->cid[priolvl].cfqpriv.nskip = 0;
+ cfqd->cid[priolvl].cfqpriv.navsec = 0;
+ cfqd->cid[priolvl].cfqpriv.timedout = 0;
+
+#if 0
atomic_set(&(cfqd->cid[priolvl].cum_rq_in),0);
atomic_set(&(cfqd->cid[priolvl].cum_rq_out),0);
atomic_set(&(cfqd->cid[priolvl].cum_sectors_in),0);
atomic_set(&(cfqd->cid[priolvl].cum_sectors_out),0);
atomic_set(&(cfqd->cid[priolvl].cum_queues_in),0);
atomic_set(&(cfqd->cid[priolvl].cum_queues_out),0);
+#endif
return count;
}
.show = cfq_epoch_show,
.store = cfq_epoch_store,
};
-static struct cfq_fs_entry cfq_epochsectors_entry = {
- .attr = {.name = "epochsectors", .mode = S_IRUGO | S_IWUSR },
- .show = cfq_epochsectors_show,
- .store = cfq_epochsectors_store,
+static struct cfq_fs_entry cfq_hmax_pct_entry = {
+ .attr = {.name = "hmaxpct", .mode = S_IRUGO | S_IWUSR },
+ .show = cfq_hmax_pct_show,
+ .store = cfq_hmax_pct_store,
};
#define P_0_STR "p0"
&cfq_grace_rt_entry.attr,
&cfq_grace_idle_entry.attr,
&cfq_epoch_entry.attr,
- &cfq_epochsectors_entry.attr,
+ &cfq_hmax_pct_entry.attr,
&cfq_prio_0_entry.attr,
&cfq_prio_1_entry.attr,
&cfq_prio_2_entry.attr,
#include <linux/ckrm_tc.h>
#include <linux/ckrm-io.h>
-/* Tie to cfq priorities */
-#define CKI_IOPRIO_NORM IOPRIO_NORM
+/* sectorate == 512 byte sectors served in CFQ_EPOCH ns*/
-/* Divisor to get fraction of bandwidth represented by an IOPRIO value */
-/* FIXME: Will not work if IOPRIO_NR > 100 */
-#define CKI_IOPRIO_DIV (IOPRIO_NR-1)
-/* Minimum ioprio value to be assigned to a class */
-#define CKI_IOPRIO_MIN 1
+/* CKI_ROOTSECTORATE needs to be made configurable from outside */
+#define CKI_ROOTSECTORATE 100000
+#define CKI_MINSECTORATE 100
#define CKI_IOUSAGE_UNIT 512
unsigned long blksz; /* size of bandwidth unit */
atomic_t blkrd; /* read units submitted to DD */
atomic_t blkwr; /* write units submitted to DD */
-
+
+ int nskip; /* # times q skipped */
+ unsigned long navsec; /* avg sectors serviced */
+ int timedout; /* # times gap > epoch */
+ u64 sec[2]; /* sectors serviced in
+ prev & curr epochs */
} cki_stats_t; /* per class I/O statistics */
/* Note
* in local units.
*/
+ cfqlim_t cfqpriv; /* Data common with cfq priolvl's */
+
+
int cnt_guarantee; /* Allocation as parent */
int cnt_unused; /* Allocation to default subclass */
+ int cnt_limit;
/* Statistics, for class and default subclass */
cki_stats_t stats;
} cki_icls_t;
-
/* Internal functions */
static inline void cki_reset_stats(cki_stats_t *usg);
static inline void init_icls_one(cki_icls_t *icls);
-static inline int cki_div(int *a, int b, int c);
-//static inline int cki_recalc(cki_icls_t *icls, int rel2abs);
static void cki_recalc_propagate(cki_icls_t *res, cki_icls_t *parres);
/* External functions e.g. interface to ioscheduler */
void *cki_tsk_icls (struct task_struct *tsk);
int cki_tsk_ioprio (struct task_struct *tsk);
-extern void cki_cfq_set(icls_tsk_t tskicls, icls_ioprio_t tskioprio);
+extern void cki_cfq_set(icls_tsk_t tskicls, icls_ioprio_t tskioprio, icls_tsk_t tskcfqpriv);
/* CKRM Resource Controller API functions */
static void * cki_alloc(struct ckrm_core_class *this,
static inline void init_icls_one(cki_icls_t *icls)
{
- // Assign zero as initial guarantee otherwise creations
- // could fail due to inadequate share
-
- //icls->shares.my_guarantee =
- // (CKI_IOPRIO_MIN * CKRM_SHARE_DFLT_TOTAL_GUARANTEE) /
- // CKI_IOPRIO_DIV ;
- icls->shares.my_guarantee = 0;
- icls->shares.my_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
- icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
- icls->shares.max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
+ /* Zero initial guarantee for scalable creation of
+ multiple classes */
- icls->shares.unused_guarantee = icls->shares.total_guarantee -
- icls->shares.my_guarantee;
- icls->shares.cur_max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-
-
- icls->cnt_guarantee = icls->cnt_unused = IOPRIO_IDLE;
+ /* Try out a new set */
+
+ icls->shares.my_guarantee = CKRM_SHARE_DONTCARE;
+ icls->shares.my_limit = CKRM_SHARE_DONTCARE;
+ icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
+ icls->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
+ icls->shares.unused_guarantee = icls->shares.total_guarantee;
+ icls->shares.cur_max_limit = 0;
- //Same rationale icls->ioprio = CKI_IOPRIO_MIN;
- //IOPRIO_IDLE equivalence to zero my_guarantee (set above) relies
- //on former being zero.
+ icls->cnt_guarantee = CKRM_SHARE_DONTCARE;
+ icls->cnt_unused = CKRM_SHARE_DONTCARE;
+ icls->cnt_limit = CKRM_SHARE_DONTCARE;
init_icls_stats(icls);
}
-
-static inline int cki_div(int *a, int b, int c)
-{
- u64 temp = (u64) b * c ;
- do_div(temp,CKI_IOPRIO_DIV);
- *a = (int) temp;
-
- return 0;
-}
-
-
-/* Recalculate absolute shares from relative (rel2abs=1)
- * or vice versa (rel2abs=0)
- * Caller should have a lock on icls
+/* Recalculate absolute shares from relative
+ * Caller should hold a lock on icls
*/
static void cki_recalc_propagate(cki_icls_t *res, cki_icls_t *parres)
ckrm_core_class_t *child = NULL;
cki_icls_t *childres;
int resid = cki_rcbs.resid;
+ u64 temp;
if (parres) {
struct ckrm_shares *par = &parres->shares;
struct ckrm_shares *self = &res->shares;
-
if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
res->cnt_guarantee = CKRM_SHARE_DONTCARE;
} else if (par->total_guarantee) {
- u64 temp = (u64) self->my_guarantee *
+ temp = (u64) self->my_guarantee *
parres->cnt_guarantee;
do_div(temp, par->total_guarantee);
res->cnt_guarantee = (int) temp;
res->cnt_guarantee = 0;
}
+
+ if (parres->cnt_limit == CKRM_SHARE_DONTCARE) {
+ res->cnt_limit = CKRM_SHARE_DONTCARE;
+ atomic_set(&res->cfqpriv.sectorate,CKI_MINSECTORATE);
+ } else {
+ if (par->max_limit) {
+ temp = (u64) self->my_limit *
+ parres->cnt_limit;
+ do_div(temp, par->max_limit);
+ res->cnt_limit = (int) temp;
+ } else {
+ res->cnt_limit = 0;
+ }
+ atomic_set(&res->cfqpriv.sectorate,res->cnt_limit);
+ }
+
if (res->cnt_guarantee == CKRM_SHARE_DONTCARE) {
res->cnt_unused = CKRM_SHARE_DONTCARE;
- } else if (self->total_guarantee) {
- u64 temp = (u64) self->unused_guarantee *
- res->cnt_guarantee;
- do_div(temp, self->total_guarantee);
- res->cnt_unused = (int) temp;
} else {
- res->cnt_unused = 0;
+ if (self->total_guarantee) {
+ temp = (u64) self->unused_guarantee *
+ res->cnt_guarantee;
+ do_div(temp, self->total_guarantee);
+ res->cnt_unused = (int) temp;
+ } else {
+ res->cnt_unused = 0;
+ }
+
}
+
}
// propagate to children
ckrm_lock_hier(res->core);
ckrm_unlock_hier(res->core);
}
-#if 0
-static inline int cki_recalc(cki_icls_t *icls, int rel2abs)
-{
- u64 temp;
-
- if (icls->parent == NULL) {
- /* Root, as parent, always gets all */
-
- temp = icls->shares.my_guarantee * (IOPRIO_NR-1);
- do_div(temp, icls->shares.total_guarantee);
-
- icls->total = IOPRIO_NR-1;
- icls->ioprio = temp ;
- icls->unused = icls->total - icls->ioprio;
-// icls->unused = (IOPRIO_NR-1)-icls->ioprio;
-
- } else {
- cki_icls_t *parres;
- int partot ;
-
- parres = ckrm_get_res_class(icls->parent,
- cki_rcbs.resid,
- cki_icls_t);
- if (!parres) {
- printk(KERN_ERR "cki_recalc: error getting "
- "resclass from core \n");
- return -EINVAL;
- }
-
-
- temp = (icls->shares.my_guarantee *
- parres->total);
- do_div(temp, parres->shares.total_guarantee);
-
- icls->ioprio = temp;
- icls->unused = 0;
-
- }
-
- return 0;
-
-}
-#endif
-
void *cki_tsk_icls(struct task_struct *tsk)
{
return (void *) ckrm_get_res_class(class_core(tsk->taskclass),
}
int cki_tsk_ioprio(struct task_struct *tsk)
+{
+ /* Don't use I/O priorities for now */
+ return IOPRIO_NORM;
+}
+
+void *cki_tsk_cfqpriv(struct task_struct *tsk)
{
cki_icls_t *icls = ckrm_get_res_class(class_core(tsk->taskclass),
cki_rcbs.resid, cki_icls_t);
- return icls->cnt_unused;
+ return (void *)&(icls->cfqpriv);
}
+
static void *cki_alloc(struct ckrm_core_class *core,
struct ckrm_core_class *parent)
{
icls->parent = parent;
icls->shares_lock = SPIN_LOCK_UNLOCKED;
- if (parent == NULL) {
-
- /* Root class gets same as "normal" CFQ priorities to
- * retain compatibility of behaviour in the absence of
- * other classes
- */
-
- icls->cnt_guarantee = icls->cnt_unused = IOPRIO_NR-1;
-
- /* Default gets normal, not minimum */
- //icls->unused = IOPRIO_NORM;
- //icls->unused = icls->guarantee-icls->myguarantee;
- //icls->limit = icls->mylimit = IOPRIO_NR;
-
- /* Compute shares in abstract units */
- icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-
- // my_guarantee for root is meaningless. Set to default
- icls->shares.my_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
+ init_icls_one(icls);
- icls->shares.unused_guarantee =
- CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-
- //temp = (u64) icls->cnt_unused * icls->shares.total_guarantee;
- //do_div(temp, CKI_IOPRIO_DIV);
- // temp now has root's default's share
- //icls->shares.unused_guarantee =
- // icls->shares.total_guarantee - temp;
-
- icls->shares.my_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
- icls->shares.max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
- icls->shares.cur_max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-
- } else {
- init_icls_one(icls);
- /* No propagation to parent needed if icls'
- initial share is zero */
+ if (parent == NULL) {
+ icls->cnt_guarantee = CKI_ROOTSECTORATE;
+ icls->cnt_unused = CKI_ROOTSECTORATE;
+ icls->cnt_limit = CKI_ROOTSECTORATE;
+ atomic_set(&(icls->cfqpriv.sectorate),icls->cnt_limit);
}
try_module_get(THIS_MODULE);
return icls;
static void cki_free(void *res)
{
- cki_icls_t *icls = res, *parres;
+ cki_icls_t *icls = res, *parres, *childres;
+ ckrm_core_class_t *child = NULL;
+ int maxlimit, resid = cki_rcbs.resid;
+
if (!res)
return;
*
*/
- parres = ckrm_get_res_class(icls->parent,
- cki_rcbs.resid,
- cki_icls_t);
+ parres = ckrm_get_res_class(icls->parent, resid, cki_icls_t);
if (!parres) {
printk(KERN_ERR "cki_free: error getting "
"resclass from core \n");
/* Update parent's shares */
spin_lock(&parres->shares_lock);
+
child_guarantee_changed(&parres->shares, icls->shares.my_guarantee, 0);
parres->cnt_unused += icls->cnt_guarantee;
+
+ // run thru parent's children and get the new max_limit of the parent
+ ckrm_lock_hier(parres->core);
+ maxlimit = 0;
+ while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
+ childres = ckrm_get_res_class(child, resid, cki_icls_t);
+ if (maxlimit < childres->shares.my_limit) {
+ maxlimit = childres->shares.my_limit;
+ }
+ }
+ ckrm_unlock_hier(parres->core);
+ if (parres->shares.cur_max_limit < maxlimit) {
+ parres->shares.cur_max_limit = maxlimit;
+ }
spin_unlock(&parres->shares_lock);
kfree(res);
struct ckrm_shares *cur, *par;
int rc = -EINVAL, resid = cki_rcbs.resid;
- if (!icls) {
- printk(KERN_ERR "No class\n");
+ if (!icls)
return rc;
- }
cur = &icls->shares;
-
- /* limits not supported */
- if ((new->max_limit != CKRM_SHARE_UNCHANGED)
- || (new->my_limit != CKRM_SHARE_UNCHANGED)) {
- printk(KERN_ERR "limits not supported\n");
- return -EINVAL;
- }
-
if (icls->parent) {
parres =
ckrm_get_res_class(icls->parent, resid, cki_icls_t);
if (!parres) {
- printk(KERN_ERR "cki_setshare: error getting "
- "resclass from core \n");
+ pr_debug("cki_setshare: invalid resclass\n");
return -EINVAL;
}
spin_lock(&parres->shares_lock);
}
rc = set_shares(new, cur, par);
- printk(KERN_ERR "rc from set_shares %d\n", rc);
if ((!rc) && parres) {
-
if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
parres->cnt_unused = CKRM_SHARE_DONTCARE;
} else if (par->total_guarantee) {
parres->cnt_unused = 0;
}
cki_recalc_propagate(res, parres);
-
-#if 0
- int old = icls->ioprio;
-
- rc = cki_recalc(icls,0);
-
- if (!rc && parres) {
- int raise_tot = icls->ioprio - old ;
- parres->unused -= raise_tot ;
- }
-#endif
}
spin_unlock(&icls->shares_lock);
if (icls->parent) {
if (!icls)
return -EINVAL;
-/*
- seq_printf(sfile, "%d my_read\n",atomic_read(&icls->mystats.blkrd));
- seq_printf(sfile, "%d my_write\n",atomic_read(&icls->mystats.blkwr));
- seq_printf(sfile, "%d total_read\n",atomic_read(&icls->stats.blkrd));
- seq_printf(sfile, "%d total_write\n",atomic_read(&icls->stats.blkwr));
-*/
-
- seq_printf(sfile, "%d total ioprio\n",icls->cnt_guarantee);
- seq_printf(sfile, "%d unused/default ioprio\n",icls->cnt_unused);
+ seq_printf(sfile, "abs limit %d\n",icls->cnt_limit);
+ seq_printf(sfile, "skip %d timdout %d avsec %lu rate %ld "
+ " sec0 %ld sec1 %ld\n",
+ icls->cfqpriv.nskip,
+ icls->cfqpriv.timedout,
+ icls->cfqpriv.navsec,
+ atomic_read(&(icls->cfqpriv.sectorate)),
+ (unsigned long)icls->cfqpriv.sec[0],
+ (unsigned long)icls->cfqpriv.sec[1]);
return 0;
}
resid = ckrm_register_res_ctlr(clstype, &cki_rcbs);
if (resid != -1) {
cki_rcbs.classtype = clstype;
- cki_cfq_set(cki_tsk_icls,cki_tsk_ioprio);
+ cki_cfq_set(cki_tsk_icls,cki_tsk_ioprio,cki_tsk_cfqpriv);
}
}
ckrm_unregister_res_ctlr(&cki_rcbs);
cki_rcbs.resid = -1;
cki_rcbs.classtype = NULL;
- cki_cfq_set(NULL,NULL);
+ cki_cfq_set(NULL,NULL,NULL);
}
module_init(cki_init)
static icls_tsk_t tskiclstub;
static icls_ioprio_t tskiopriostub;
+static icls_tsk_t tskcfqprivstub;
-
-void cki_cfq_set(icls_tsk_t tskicls, icls_ioprio_t tskioprio)
+void cki_cfq_set(icls_tsk_t tskicls, icls_ioprio_t tskioprio, icls_tsk_t tskcfqpriv)
{
spin_lock(&stub_lock);
tskiclstub = tskicls;
tskiopriostub = tskioprio;
+ tskcfqprivstub = tskcfqpriv;
spin_unlock(&stub_lock);
}
return ret;
}
+void *cki_cfqpriv(struct task_struct *tsk)
+{
+ void *ret;
+ spin_lock(&stub_lock);
+ if (tskiclstub)
+ ret = (*tskcfqprivstub)(tsk);
+ else
+ ret = NULL;
+ spin_unlock(&stub_lock);
+ return ret;
+}
+
EXPORT_SYMBOL(cki_cfq_set);
EXPORT_SYMBOL(cki_hash_key);
EXPORT_SYMBOL(cki_ioprio);
+EXPORT_SYMBOL(cki_cfqpriv);
--- /dev/null
+consolemap_deftbl.c
+defkeymap.c
--- /dev/null
+classlist.h
+devlist.h
+gen-devlist
--- /dev/null
+aic79xx_reg.h
+aic79xx_seq.h
+aic7xxx_reg.h
+aic7xxx_seq.h
return ioctx;
}
-static void use_mm(struct mm_struct *mm)
+void use_mm(struct mm_struct *mm)
{
struct mm_struct *active_mm;
--- /dev/null
+asm_offsets.h
#define APIC_LVT_REMOTE_IRR (1<<14)
#define APIC_INPUT_POLARITY (1<<13)
#define APIC_SEND_PENDING (1<<12)
+#define APIC_MODE_MASK 0x700
#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7)
#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8))
#define APIC_MODE_FIXED 0x0
u32 stack[THREAD_SIZE/sizeof(u32)];
};
+#ifdef CONFIG_IRQSTACKS
extern union irq_ctx *hardirq_ctx[NR_CPUS];
extern union irq_ctx *softirq_ctx[NR_CPUS];
#define __ARCH_HAS_DO_SOFTIRQ
+#else
+#define irq_ctx_init(cpu) do { ; } while (0)
+#endif
+
struct irqaction;
struct pt_regs;
asmlinkage int handle_IRQ_event(unsigned int, struct pt_regs *,
--- /dev/null
+#ifndef _I386_KEXEC_H
+#define _I386_KEXEC_H
+
+#include <asm/fixmap.h>
+
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ *
+ * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct
+ * calculation for the amount of memory directly mappable into the
+ * kernel memory space.
+ */
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_CODE_SIZE 4096
+
+#endif /* _I386_KEXEC_H */
#define MODULE_REGPARM ""
#endif
+#if (CONFIG_STACK_SIZE_SHIFT < 12)
+#define MODULE_STACKSIZE "TINYSTACKS "
+#elif (CONFIG_STACK_SIZE_SHIFT == 12)
#define MODULE_STACKSIZE "4KSTACKS "
+#elif (CONFIG_STACK_SIZE_SHIFT == 13)
+#define MODULE_STACKSIZE "8KSTACKS "
+#elif (CONFIG_STACK_SIZE_SHIFT == 14)
+#define MODULE_STACKSIZE "16KSTACKS "
+#elif (CONFIG_STACK_SIZE_SHIFT > 14)
+#define MODULE_STACKSIZE "HUGESTACKS "
+#else
+#define MODULE_STACKSIZE ""
+#endif
#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_REGPARM MODULE_STACKSIZE
#define ARCH_MIN_TASKALIGN 16
-
-#define STACK_PAGE_COUNT (4096/PAGE_SIZE)
-
-
+#if ((1<<CONFIG_STACK_SIZE_SHIFT) < PAGE_SIZE)
+#error (1<<CONFIG_STACK_SIZE_SHIFT) must be at least PAGE_SIZE
+#endif
+#define STACK_PAGE_COUNT ((1<<CONFIG_STACK_SIZE_SHIFT)/PAGE_SIZE)
struct thread_struct {
*/
#define IDT_ENTRIES 256
+#define KERN_PHYS_OFFSET (CONFIG_KERN_PHYS_OFFSET * 0x100000)
+
#endif
#endif
#define PREEMPT_ACTIVE 0x4000000
-#define THREAD_SIZE (4096)
+#define THREAD_SIZE (1<<CONFIG_STACK_SIZE_SHIFT)
+#define STACK_WARN (CONFIG_STACK_WARN)
+#define STACK_PANIC (0x200ul)
-#define STACK_WARN (THREAD_SIZE/8)
/*
* macros/functions for gaining access to the thread information structure
*
--- /dev/null
+autoconf.h
+compile.h
+version.h
extern void *cki_tsk_icls (struct task_struct *tsk);
extern int cki_tsk_ioprio (struct task_struct *tsk);
+extern void *cki_tsk_cfqpriv (struct task_struct *tsk);
#endif /* CONFIG_CKRM_RES_BLKIO */
asmlinkage int sys_ioprio_set(int ioprio);
asmlinkage int sys_ioprio_get(void);
+/* common structure for cfq & ckrm I/O controller */
+typedef struct cfqlim {
+ int nskip;
+ unsigned long navsec;
+ int timedout;
+ atomic_t sectorate;
+ u64 sec[2];
+} cfqlim_t ;
+
#endif /* __KERNEL__ */
#endif /* _LINUX_FS_H */
--- /dev/null
+#ifndef LINUX_KEXEC_H
+#define LINUX_KEXEC_H
+
+#ifdef CONFIG_KEXEC
+#include <linux/types.h>
+#include <linux/list.h>
+#include <asm/kexec.h>
+
+/*
+ * This structure is used to hold the arguments that are used when loading
+ * kernel binaries.
+ */
+
+typedef unsigned long kimage_entry_t;
+#define IND_DESTINATION 0x1
+#define IND_INDIRECTION 0x2
+#define IND_DONE 0x4
+#define IND_SOURCE 0x8
+
+#define KEXEC_SEGMENT_MAX 8
+struct kexec_segment {
+ void *buf;
+ size_t bufsz;
+ void *mem;
+ size_t memsz;
+};
+
+struct kimage {
+ kimage_entry_t head;
+ kimage_entry_t *entry;
+ kimage_entry_t *last_entry;
+
+ unsigned long destination;
+
+ unsigned long start;
+ struct page *control_code_page;
+
+ unsigned long nr_segments;
+ struct kexec_segment segment[KEXEC_SEGMENT_MAX];
+
+ struct list_head control_pages;
+ struct list_head dest_pages;
+ struct list_head unuseable_pages;
+};
+
+
+/* kexec interface functions */
+extern void machine_kexec(struct kimage *image);
+extern int machine_kexec_prepare(struct kimage *image);
+extern void machine_kexec_cleanup(struct kimage *image);
+extern asmlinkage long sys_kexec(unsigned long entry, long nr_segments,
+ struct kexec_segment *segments);
+extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order);
+extern struct kimage *kexec_image;
+#endif
+#endif /* LINUX_KEXEC_H */
+++ /dev/null
-/* PPTP constants and structs */
-#ifndef _CONNTRACK_PPTP_H
-#define _CONNTRACK_PPTP_H
-
-/* state of the control session */
-enum pptp_ctrlsess_state {
- PPTP_SESSION_NONE, /* no session present */
- PPTP_SESSION_ERROR, /* some session error */
- PPTP_SESSION_STOPREQ, /* stop_sess request seen */
- PPTP_SESSION_REQUESTED, /* start_sess request seen */
- PPTP_SESSION_CONFIRMED, /* session established */
-};
-
-/* state of the call inside the control session */
-enum pptp_ctrlcall_state {
- PPTP_CALL_NONE,
- PPTP_CALL_ERROR,
- PPTP_CALL_OUT_REQ,
- PPTP_CALL_OUT_CONF,
- PPTP_CALL_IN_REQ,
- PPTP_CALL_IN_REP,
- PPTP_CALL_IN_CONF,
- PPTP_CALL_CLEAR_REQ,
-};
-
-
-/* conntrack private data */
-struct ip_ct_pptp_master {
- enum pptp_ctrlsess_state sstate; /* session state */
-
- /* everything below is going to be per-expectation in newnat,
- * since there could be more than one call within one session */
- enum pptp_ctrlcall_state cstate; /* call state */
- u_int16_t pac_call_id; /* call id of PAC, host byte order */
- u_int16_t pns_call_id; /* call id of PNS, host byte order */
-};
-
-/* conntrack_expect private member */
-struct ip_ct_pptp_expect {
- enum pptp_ctrlcall_state cstate; /* call state */
- u_int16_t pac_call_id; /* call id of PAC */
- u_int16_t pns_call_id; /* call id of PNS */
-};
-
-
-#ifdef __KERNEL__
-
-#include <linux/netfilter_ipv4/lockhelp.h>
-DECLARE_LOCK_EXTERN(ip_pptp_lock);
-
-#define IP_CONNTR_PPTP PPTP_CONTROL_PORT
-
-#define PPTP_CONTROL_PORT 1723
-
-#define PPTP_PACKET_CONTROL 1
-#define PPTP_PACKET_MGMT 2
-
-#define PPTP_MAGIC_COOKIE 0x1a2b3c4d
-
-struct pptp_pkt_hdr {
- __u16 packetLength;
- __u16 packetType;
- __u32 magicCookie;
-};
-
-/* PptpControlMessageType values */
-#define PPTP_START_SESSION_REQUEST 1
-#define PPTP_START_SESSION_REPLY 2
-#define PPTP_STOP_SESSION_REQUEST 3
-#define PPTP_STOP_SESSION_REPLY 4
-#define PPTP_ECHO_REQUEST 5
-#define PPTP_ECHO_REPLY 6
-#define PPTP_OUT_CALL_REQUEST 7
-#define PPTP_OUT_CALL_REPLY 8
-#define PPTP_IN_CALL_REQUEST 9
-#define PPTP_IN_CALL_REPLY 10
-#define PPTP_IN_CALL_CONNECT 11
-#define PPTP_CALL_CLEAR_REQUEST 12
-#define PPTP_CALL_DISCONNECT_NOTIFY 13
-#define PPTP_WAN_ERROR_NOTIFY 14
-#define PPTP_SET_LINK_INFO 15
-
-#define PPTP_MSG_MAX 15
-
-/* PptpGeneralError values */
-#define PPTP_ERROR_CODE_NONE 0
-#define PPTP_NOT_CONNECTED 1
-#define PPTP_BAD_FORMAT 2
-#define PPTP_BAD_VALUE 3
-#define PPTP_NO_RESOURCE 4
-#define PPTP_BAD_CALLID 5
-#define PPTP_REMOVE_DEVICE_ERROR 6
-
-struct PptpControlHeader {
- __u16 messageType;
- __u16 reserved;
-};
-
-/* FramingCapability Bitmap Values */
-#define PPTP_FRAME_CAP_ASYNC 0x1
-#define PPTP_FRAME_CAP_SYNC 0x2
-
-/* BearerCapability Bitmap Values */
-#define PPTP_BEARER_CAP_ANALOG 0x1
-#define PPTP_BEARER_CAP_DIGITAL 0x2
-
-struct PptpStartSessionRequest {
- __u16 protocolVersion;
- __u8 reserved1;
- __u8 reserved2;
- __u32 framingCapability;
- __u32 bearerCapability;
- __u16 maxChannels;
- __u16 firmwareRevision;
- __u8 hostName[64];
- __u8 vendorString[64];
-};
-
-/* PptpStartSessionResultCode Values */
-#define PPTP_START_OK 1
-#define PPTP_START_GENERAL_ERROR 2
-#define PPTP_START_ALREADY_CONNECTED 3
-#define PPTP_START_NOT_AUTHORIZED 4
-#define PPTP_START_UNKNOWN_PROTOCOL 5
-
-struct PptpStartSessionReply {
- __u16 protocolVersion;
- __u8 resultCode;
- __u8 generalErrorCode;
- __u32 framingCapability;
- __u32 bearerCapability;
- __u16 maxChannels;
- __u16 firmwareRevision;
- __u8 hostName[64];
- __u8 vendorString[64];
-};
-
-/* PptpStopReasons */
-#define PPTP_STOP_NONE 1
-#define PPTP_STOP_PROTOCOL 2
-#define PPTP_STOP_LOCAL_SHUTDOWN 3
-
-struct PptpStopSessionRequest {
- __u8 reason;
-};
-
-/* PptpStopSessionResultCode */
-#define PPTP_STOP_OK 1
-#define PPTP_STOP_GENERAL_ERROR 2
-
-struct PptpStopSessionReply {
- __u8 resultCode;
- __u8 generalErrorCode;
-};
-
-struct PptpEchoRequest {
- __u32 identNumber;
-};
-
-/* PptpEchoReplyResultCode */
-#define PPTP_ECHO_OK 1
-#define PPTP_ECHO_GENERAL_ERROR 2
-
-struct PptpEchoReply {
- __u32 identNumber;
- __u8 resultCode;
- __u8 generalErrorCode;
- __u16 reserved;
-};
-
-/* PptpFramingType */
-#define PPTP_ASYNC_FRAMING 1
-#define PPTP_SYNC_FRAMING 2
-#define PPTP_DONT_CARE_FRAMING 3
-
-/* PptpCallBearerType */
-#define PPTP_ANALOG_TYPE 1
-#define PPTP_DIGITAL_TYPE 2
-#define PPTP_DONT_CARE_BEARER_TYPE 3
-
-struct PptpOutCallRequest {
- __u16 callID;
- __u16 callSerialNumber;
- __u32 minBPS;
- __u32 maxBPS;
- __u32 bearerType;
- __u32 framingType;
- __u16 packetWindow;
- __u16 packetProcDelay;
- __u16 reserved1;
- __u16 phoneNumberLength;
- __u16 reserved2;
- __u8 phoneNumber[64];
- __u8 subAddress[64];
-};
-
-/* PptpCallResultCode */
-#define PPTP_OUTCALL_CONNECT 1
-#define PPTP_OUTCALL_GENERAL_ERROR 2
-#define PPTP_OUTCALL_NO_CARRIER 3
-#define PPTP_OUTCALL_BUSY 4
-#define PPTP_OUTCALL_NO_DIAL_TONE 5
-#define PPTP_OUTCALL_TIMEOUT 6
-#define PPTP_OUTCALL_DONT_ACCEPT 7
-
-struct PptpOutCallReply {
- __u16 callID;
- __u16 peersCallID;
- __u8 resultCode;
- __u8 generalErrorCode;
- __u16 causeCode;
- __u32 connectSpeed;
- __u16 packetWindow;
- __u16 packetProcDelay;
- __u32 physChannelID;
-};
-
-struct PptpInCallRequest {
- __u16 callID;
- __u16 callSerialNumber;
- __u32 callBearerType;
- __u32 physChannelID;
- __u16 dialedNumberLength;
- __u16 dialingNumberLength;
- __u8 dialedNumber[64];
- __u8 dialingNumber[64];
- __u8 subAddress[64];
-};
-
-/* PptpInCallResultCode */
-#define PPTP_INCALL_ACCEPT 1
-#define PPTP_INCALL_GENERAL_ERROR 2
-#define PPTP_INCALL_DONT_ACCEPT 3
-
-struct PptpInCallReply {
- __u16 callID;
- __u16 peersCallID;
- __u8 resultCode;
- __u8 generalErrorCode;
- __u16 packetWindow;
- __u16 packetProcDelay;
- __u16 reserved;
-};
-
-struct PptpInCallConnected {
- __u16 peersCallID;
- __u16 reserved;
- __u32 connectSpeed;
- __u16 packetWindow;
- __u16 packetProcDelay;
- __u32 callFramingType;
-};
-
-struct PptpClearCallRequest {
- __u16 callID;
- __u16 reserved;
-};
-
-struct PptpCallDisconnectNotify {
- __u16 callID;
- __u8 resultCode;
- __u8 generalErrorCode;
- __u16 causeCode;
- __u16 reserved;
- __u8 callStatistics[128];
-};
-
-struct PptpWanErrorNotify {
- __u16 peersCallID;
- __u16 reserved;
- __u32 crcErrors;
- __u32 framingErrors;
- __u32 hardwareOverRuns;
- __u32 bufferOverRuns;
- __u32 timeoutErrors;
- __u32 alignmentErrors;
-};
-
-struct PptpSetLinkInfo {
- __u16 peersCallID;
- __u16 reserved;
- __u32 sendAccm;
- __u32 recvAccm;
-};
-
-
-struct pptp_priv_data {
- __u16 call_id;
- __u16 mcall_id;
- __u16 pcall_id;
-};
-
-union pptp_ctrl_union {
- struct PptpStartSessionRequest sreq;
- struct PptpStartSessionReply srep;
- struct PptpStopSessionRequest streq;
- struct PptpStopSessionReply strep;
- struct PptpOutCallRequest ocreq;
- struct PptpOutCallReply ocack;
- struct PptpInCallRequest icreq;
- struct PptpInCallReply icack;
- struct PptpInCallConnected iccon;
- struct PptpClearCallRequest clrreq;
- struct PptpCallDisconnectNotify disc;
- struct PptpWanErrorNotify wanerr;
- struct PptpSetLinkInfo setlink;
-};
-
-#endif /* __KERNEL__ */
-#endif /* _CONNTRACK_PPTP_H */
+++ /dev/null
-#ifndef _CONNTRACK_PROTO_GRE_H
-#define _CONNTRACK_PROTO_GRE_H
-#include <asm/byteorder.h>
-
-/* GRE PROTOCOL HEADER */
-
-/* GRE Version field */
-#define GRE_VERSION_1701 0x0
-#define GRE_VERSION_PPTP 0x1
-
-/* GRE Protocol field */
-#define GRE_PROTOCOL_PPTP 0x880B
-
-/* GRE Flags */
-#define GRE_FLAG_C 0x80
-#define GRE_FLAG_R 0x40
-#define GRE_FLAG_K 0x20
-#define GRE_FLAG_S 0x10
-#define GRE_FLAG_A 0x80
-
-#define GRE_IS_C(f) ((f)&GRE_FLAG_C)
-#define GRE_IS_R(f) ((f)&GRE_FLAG_R)
-#define GRE_IS_K(f) ((f)&GRE_FLAG_K)
-#define GRE_IS_S(f) ((f)&GRE_FLAG_S)
-#define GRE_IS_A(f) ((f)&GRE_FLAG_A)
-
-/* GRE is a mess: Four different standards */
-struct gre_hdr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u16 rec:3,
- srr:1,
- seq:1,
- key:1,
- routing:1,
- csum:1,
- version:3,
- reserved:4,
- ack:1;
-#elif defined(__BIG_ENDIAN_BITFIELD)
- __u16 csum:1,
- routing:1,
- key:1,
- seq:1,
- srr:1,
- rec:3,
- ack:1,
- reserved:4,
- version:3;
-#else
-#error "Adjust your <asm/byteorder.h> defines"
-#endif
- __u16 protocol;
-};
-
-/* modified GRE header for PPTP */
-struct gre_hdr_pptp {
- __u8 flags; /* bitfield */
- __u8 version; /* should be GRE_VERSION_PPTP */
- __u16 protocol; /* should be GRE_PROTOCOL_PPTP */
- __u16 payload_len; /* size of ppp payload, not inc. gre header */
- __u16 call_id; /* peer's call_id for this session */
- __u32 seq; /* sequence number. Present if S==1 */
- __u32 ack; /* seq number of highest packet recieved by */
- /* sender in this session */
-};
-
-
-/* this is part of ip_conntrack */
-struct ip_ct_gre {
- unsigned int stream_timeout;
- unsigned int timeout;
-};
-
-/* this is part of ip_conntrack_expect */
-struct ip_ct_gre_expect {
- struct ip_ct_gre_keymap *keymap_orig, *keymap_reply;
-};
-
-#ifdef __KERNEL__
-struct ip_conntrack_expect;
-
-/* structure for original <-> reply keymap */
-struct ip_ct_gre_keymap {
- struct list_head list;
-
- struct ip_conntrack_tuple tuple;
-};
-
-
-/* add new tuple->key_reply pair to keymap */
-int ip_ct_gre_keymap_add(struct ip_conntrack_expect *exp,
- struct ip_conntrack_tuple *t,
- int reply);
-
-/* change an existing keymap entry */
-void ip_ct_gre_keymap_change(struct ip_ct_gre_keymap *km,
- struct ip_conntrack_tuple *t);
-
-/* delete keymap entries */
-void ip_ct_gre_keymap_destroy(struct ip_conntrack_expect *exp);
-
-
-/* get pointer to gre key, if present */
-static inline u_int32_t *gre_key(struct gre_hdr *greh)
-{
- if (!greh->key)
- return NULL;
- if (greh->csum || greh->routing)
- return (u_int32_t *) (greh+sizeof(*greh)+4);
- return (u_int32_t *) (greh+sizeof(*greh));
-}
-
-/* get pointer ot gre csum, if present */
-static inline u_int16_t *gre_csum(struct gre_hdr *greh)
-{
- if (!greh->csum)
- return NULL;
- return (u_int16_t *) (greh+sizeof(*greh));
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* _CONNTRACK_PROTO_GRE_H */
+++ /dev/null
-/* PPTP constants and structs */
-#ifndef _NAT_PPTP_H
-#define _NAT_PPTP_H
-
-/* conntrack private data */
-struct ip_nat_pptp {
- u_int16_t pns_call_id; /* NAT'ed PNS call id */
- u_int16_t pac_call_id; /* NAT'ed PAC call id */
-};
-
-#endif /* _NAT_PPTP_H */
extern void machine_halt(void);
extern void machine_power_off(void);
+extern void machine_shutdown(void);
+
#endif
#endif /* _LINUX_REBOOT_H */
--- /dev/null
+config_data.gz
+config_data.h
obj-$(CONFIG_KALLSYMS) += kallsyms.o
obj-$(CONFIG_PM) += power/
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
+obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_IKCONFIG) += configs.o
obj-$(CONFIG_IKCONFIG_PROC) += configs.o
obj-$(CONFIG_CKRM_TYPE_TASKCLASS) += ckrm_tc.o
obj-$(CONFIG_CKRM_RES_NUMTASKS) += ckrm_numtasks.o
obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o
- obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_laq.o
+ obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_listenaq.o
obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_cpu_class.o ckrm_cpu_monitor.o
obj-$(CONFIG_CKRM_RES_MEM) += ckrm_mem.o
+++ /dev/null
-/* ckrm_socketaq.c - accept queue resource controller
- *
- * Copyright (C) Vivek Kashyap, IBM Corp. 2004
- *
- * Latest version, more details at http://ckrm.sf.net
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-/* Changes
- * Initial version
- */
-
-/* Code Description: TBD
- *
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <asm/errno.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/ckrm.h>
-#include <linux/ckrm_rc.h>
-#include <net/tcp.h>
-
-#include <linux/ckrm_net.h>
-
-#define hnode_2_core(ptr) \
- ((ptr) ? container_of(ptr, struct ckrm_core_class, hnode) : NULL)
-
-#define CKRM_SAQ_MAX_DEPTH 3 // 0 => /rcfs
- // 1 => socket_aq
- // 2 => socket_aq/listen_class
- // 3 => socket_aq/listen_class/accept_queues
- // 4 => Not allowed
-
-typedef struct ckrm_laq_res {
- spinlock_t reslock;
- atomic_t refcnt;
- struct ckrm_shares shares;
- struct ckrm_core_class *core;
- struct ckrm_core_class *pcore;
- int my_depth;
- int my_id;
- unsigned int min_ratio;
-} ckrm_laq_res_t;
-
-static int my_resid = -1;
-
-extern struct ckrm_core_class *rcfs_create_under_netroot(char *, int, int);
-extern struct ckrm_core_class *rcfs_make_core(struct dentry *,
- struct ckrm_core_class *);
-
-void laq_res_hold(struct ckrm_laq_res *res)
-{
- atomic_inc(&res->refcnt);
- return;
-}
-
-void laq_res_put(struct ckrm_laq_res *res)
-{
- if (atomic_dec_and_test(&res->refcnt))
- kfree(res);
- return;
-}
-
-/* Initialize rescls values
- */
-static void laq_res_initcls(void *my_res)
-{
- ckrm_laq_res_t *res = my_res;
-
- res->shares.my_guarantee = CKRM_SHARE_DONTCARE;
- res->shares.my_limit = CKRM_SHARE_DONTCARE;
- res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
- res->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
- res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
- res->shares.cur_max_limit = 0;
-}
-
-static int atoi(char *s)
-{
- int k = 0;
- while (*s)
- k = *s++ - '0' + (k * 10);
- return k;
-}
-
-static char *laq_get_name(struct ckrm_core_class *c)
-{
- char *p = (char *)c->name;
-
- while (*p)
- p++;
- while (*p != '/' && p != c->name)
- p--;
-
- return ++p;
-}
-
-static void *laq_res_alloc(struct ckrm_core_class *core,
- struct ckrm_core_class *parent)
-{
- ckrm_laq_res_t *res, *pres;
- int pdepth;
-
- if (parent)
- pres = ckrm_get_res_class(parent, my_resid, ckrm_laq_res_t);
- else
- pres = NULL;
-
- if (core == core->classtype->default_class)
- pdepth = 1;
- else {
- if (!parent)
- return NULL;
- pdepth = 1 + pres->my_depth;
- }
-
- res = kmalloc(sizeof(ckrm_laq_res_t), GFP_ATOMIC);
- if (res) {
- memset(res, 0, sizeof(res));
- spin_lock_init(&res->reslock);
- laq_res_hold(res);
- res->my_depth = pdepth;
- if (pdepth == 2) // listen class
- res->my_id = 0;
- else if (pdepth == 3)
- res->my_id = atoi(laq_get_name(core));
- res->core = core;
- res->pcore = parent;
-
- // rescls in place, now initialize contents other than
- // hierarchy pointers
- laq_res_initcls(res); // acts as initialising value
- }
-
- return res;
-}
-
-static void laq_res_free(void *my_res)
-{
- ckrm_laq_res_t *res = (ckrm_laq_res_t *) my_res;
- ckrm_laq_res_t *parent;
-
- if (!res)
- return;
-
- if (res->my_depth != 3) {
- kfree(res);
- return;
- }
-
- parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t);
- if (!parent) // Should never happen
- return;
-
- spin_lock(&parent->reslock);
- spin_lock(&res->reslock);
-
- // return child's guarantee to parent node
- // Limits have no meaning for accept queue control
- child_guarantee_changed(&parent->shares, res->shares.my_guarantee, 0);
-
- spin_unlock(&res->reslock);
- laq_res_put(res);
- spin_unlock(&parent->reslock);
- return;
-}
-
-/**************************************************************************
- * SHARES ***
- **************************************************************************/
-
-void laq_set_aq_value(struct ckrm_net_struct *ns, unsigned int *aq_ratio)
-{
- int i;
- struct tcp_opt *tp;
-
- tp = tcp_sk(ns->ns_sk);
- for (i = 0; i < NUM_ACCEPT_QUEUES; i++)
- tp->acceptq[i].aq_ratio = aq_ratio[i];
- return;
-}
-void laq_set_aq_values(ckrm_laq_res_t * parent, unsigned int *aq_ratio)
-{
-
- struct ckrm_net_struct *ns;
- struct ckrm_core_class *core = parent->core;
-
- class_lock(core);
- list_for_each_entry(ns, &core->objlist, ckrm_link) {
- laq_set_aq_value(ns, aq_ratio);
- }
- class_unlock(core);
- return;
-}
-
-static void calculate_aq_ratios(ckrm_laq_res_t * res, unsigned int *aq_ratio)
-{
- struct ckrm_hnode *chnode;
- ckrm_laq_res_t *child;
- unsigned int min;
- int i;
-
- min = aq_ratio[0] = (unsigned int)res->shares.unused_guarantee;
-
- list_for_each_entry(chnode, &res->core->hnode.children, siblings) {
- child = hnode_2_core(chnode)->res_class[my_resid];
-
- aq_ratio[child->my_id] =
- (unsigned int)child->shares.my_guarantee;
- if (aq_ratio[child->my_id] == CKRM_SHARE_DONTCARE)
- aq_ratio[child->my_id] = 0;
- if (aq_ratio[child->my_id] &&
- ((unsigned int)aq_ratio[child->my_id] < min))
- min = (unsigned int)child->shares.my_guarantee;
- }
-
- if (min == 0) {
- min = 1;
- // default takes all if nothing specified
- aq_ratio[0] = 1;
- }
- res->min_ratio = min;
-
- for (i = 0; i < NUM_ACCEPT_QUEUES; i++)
- aq_ratio[i] = aq_ratio[i] / min;
-}
-
-static int laq_set_share_values(void *my_res, struct ckrm_shares *shares)
-{
- ckrm_laq_res_t *res = my_res;
- ckrm_laq_res_t *parent;
- unsigned int aq_ratio[NUM_ACCEPT_QUEUES];
- int rc = 0;
-
- if (!res)
- return -EINVAL;
-
- if (!res->pcore) {
- // something is badly wrong
- printk(KERN_ERR "socketaq internal inconsistency\n");
- return -EBADF;
- }
-
- parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t);
- if (!parent) // socketclass does not have a share interface
- return -EINVAL;
-
- // Ensure that we ignore limit values
- shares->my_limit = CKRM_SHARE_DONTCARE;
- shares->max_limit = CKRM_SHARE_UNCHANGED;
-
- if (res->my_depth == 0) {
- printk(KERN_ERR "socketaq bad entry\n");
- return -EBADF;
- } else if (res->my_depth == 1) {
- // can't be written to. This is an internal default.
- return -EINVAL;
- } else if (res->my_depth == 2) {
- //nothin to inherit
- if (!shares->total_guarantee) {
- return -EINVAL;
- }
- parent = res;
- shares->my_guarantee = CKRM_SHARE_DONTCARE;
- } else if (res->my_depth == 3) {
- // accept queue itself.
- shares->total_guarantee = CKRM_SHARE_UNCHANGED;
- }
-
- ckrm_lock_hier(parent->pcore);
- spin_lock(&parent->reslock);
- rc = set_shares(shares, &res->shares,
- (parent == res) ? NULL : &parent->shares);
- if (rc) {
- spin_unlock(&res->reslock);
- ckrm_unlock_hier(res->pcore);
- return rc;
- }
- calculate_aq_ratios(parent, aq_ratio);
- laq_set_aq_values(parent, aq_ratio);
- spin_unlock(&parent->reslock);
- ckrm_unlock_hier(parent->pcore);
-
- return rc;
-}
-
-static int laq_get_share_values(void *my_res, struct ckrm_shares *shares)
-{
- ckrm_laq_res_t *res = my_res;
-
- if (!res)
- return -EINVAL;
- *shares = res->shares;
- return 0;
-}
-
-/**************************************************************************
- * STATS ***
- **************************************************************************/
-
-void
-laq_print_aq_stats(struct seq_file *sfile, struct tcp_acceptq_info *taq, int i)
-{
- seq_printf(sfile, "Class %d connections:\n\taccepted: %u\n\t"
- "queued: %u\n\twait_time: %u\n",
- i, taq->acceptq_count, taq->acceptq_qcount,
- jiffies_to_msecs(taq->acceptq_wait_time));
-
- if (i)
- return;
-
- for (i = 1; i < NUM_ACCEPT_QUEUES; i++) {
- taq[0].acceptq_wait_time += taq[i].acceptq_wait_time;
- taq[0].acceptq_qcount += taq[i].acceptq_qcount;
- taq[0].acceptq_count += taq[i].acceptq_count;
- }
-
- seq_printf(sfile, "Totals :\n\taccepted: %u\n\t"
- "queued: %u\n\twait_time: %u\n",
- taq->acceptq_count, taq->acceptq_qcount,
- jiffies_to_msecs(taq->acceptq_wait_time));
-
- return;
-}
-
-void
-laq_get_aq_stats(ckrm_laq_res_t * pres, ckrm_laq_res_t * mres,
- struct tcp_acceptq_info *taq)
-{
- struct ckrm_net_struct *ns;
- struct ckrm_core_class *core = pres->core;
- struct tcp_opt *tp;
- int a = mres->my_id;
- int z;
-
- if (a == 0)
- z = NUM_ACCEPT_QUEUES;
- else
- z = a + 1;
-
- // XXX Instead of holding a class_lock introduce a rw
- // lock to be write locked by listen callbacks and read locked here.
- // - VK
- class_lock(pres->core);
- list_for_each_entry(ns, &core->objlist, ckrm_link) {
- tp = tcp_sk(ns->ns_sk);
- for (; a < z; a++) {
- taq->acceptq_wait_time += tp->acceptq[a].aq_wait_time;
- taq->acceptq_qcount += tp->acceptq[a].aq_qcount;
- taq->acceptq_count += tp->acceptq[a].aq_count;
- taq++;
- }
- }
- class_unlock(pres->core);
-}
-
-static int laq_get_stats(void *my_res, struct seq_file *sfile)
-{
- ckrm_laq_res_t *res = my_res;
- ckrm_laq_res_t *parent;
- struct tcp_acceptq_info taq[NUM_ACCEPT_QUEUES];
- int rc = 0;
-
- if (!res)
- return -EINVAL;
-
- if (!res->pcore) {
- // something is badly wrong
- printk(KERN_ERR "socketaq internal inconsistency\n");
- return -EBADF;
- }
-
- parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t);
- if (!parent) { // socketclass does not have a stat interface
- printk(KERN_ERR "socketaq internal fs inconsistency\n");
- return -EINVAL;
- }
-
- memset(taq, 0, sizeof(struct tcp_acceptq_info) * NUM_ACCEPT_QUEUES);
-
- switch (res->my_depth) {
-
- default:
- case 0:
- printk(KERN_ERR "socket class bad entry\n");
- rc = -EBADF;
- break;
-
- case 1: // can't be read from. this is internal default.
- // return -EINVAL
- rc = -EINVAL;
- break;
-
- case 2: // return the default and total
- ckrm_lock_hier(res->core); // block any deletes
- laq_get_aq_stats(res, res, &taq[0]);
- laq_print_aq_stats(sfile, &taq[0], 0);
- ckrm_unlock_hier(res->core); // block any deletes
- break;
-
- case 3:
- ckrm_lock_hier(parent->core); // block any deletes
- laq_get_aq_stats(parent, res, &taq[res->my_id]);
- laq_print_aq_stats(sfile, &taq[res->my_id], res->my_id);
- ckrm_unlock_hier(parent->core); // block any deletes
- break;
- }
-
- return rc;
-}
-
-/*
- * The network connection is reclassified to this class. Update its shares.
- * The socket lock is held.
- */
-static void laq_change_resclass(void *n, void *old, void *r)
-{
- struct ckrm_net_struct *ns = (struct ckrm_net_struct *)n;
- struct ckrm_laq_res *res = (struct ckrm_laq_res *)r;
- unsigned int aq_ratio[NUM_ACCEPT_QUEUES];
-
- if (res->my_depth != 2)
- return;
-
- // a change to my_depth == 3 ie. the accept classes cannot happen.
- // there is no target file
- if (res->my_depth == 2) { // it is one of the socket classes
- ckrm_lock_hier(res->pcore);
- // share rule: hold parent resource lock. then self.
- // However, since my_depth == 1 is a generic class it is not
- // needed here. Self lock is enough.
- spin_lock(&res->reslock);
- calculate_aq_ratios(res, aq_ratio);
- class_lock(res->pcore);
- laq_set_aq_value(ns, aq_ratio);
- class_unlock(res->pcore);
- spin_unlock(&res->reslock);
- ckrm_unlock_hier(res->pcore);
- }
-
- return;
-}
-
-struct ckrm_res_ctlr laq_rcbs = {
- .res_name = "laq",
- .resid = -1, // dynamically assigned
- .res_alloc = laq_res_alloc,
- .res_free = laq_res_free,
- .set_share_values = laq_set_share_values,
- .get_share_values = laq_get_share_values,
- .get_stats = laq_get_stats,
- .change_resclass = laq_change_resclass,
- //.res_initcls = laq_res_initcls, //HUBERTUS: unnecessary !!
-};
-
-int __init init_ckrm_laq_res(void)
-{
- struct ckrm_classtype *clstype;
- int resid;
-
- clstype = ckrm_find_classtype_by_name("socketclass");
- if (clstype == NULL) {
- printk(KERN_INFO " Unknown ckrm classtype<socketclass>");
- return -ENOENT;
- }
-
- if (my_resid == -1) {
- resid = ckrm_register_res_ctlr(clstype, &laq_rcbs);
- if (resid >= 0)
- my_resid = resid;
- printk(KERN_DEBUG "........init_ckrm_listen_aq_res -> %d\n", my_resid);
- }
- return 0;
-
-}
-
-void __exit exit_ckrm_laq_res(void)
-{
- ckrm_unregister_res_ctlr(&laq_rcbs);
- my_resid = -1;
-}
-
-module_init(init_ckrm_laq_res)
- module_exit(exit_ckrm_laq_res)
-
- MODULE_LICENSE("GPL");
-/* ckrm_socketaq.c - accept queue resource controller
+/* ckrm_listenaq.c - accept queue resource controller
*
* Copyright (C) Vivek Kashyap, IBM Corp. 2004
*
}
parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t);
- if (!parent) // socket_class does not have a share interface
+ if (!parent) // socketclass does not have a share interface
return -EINVAL;
// Ensure that we ignore limit values
}
parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t);
- if (!parent) { // socket_class does not have a stat interface
+ if (!parent) { // socketclass does not have a stat interface
printk(KERN_ERR "socketaq internal fs inconsistency\n");
return -EINVAL;
}
}
struct ckrm_res_ctlr laq_rcbs = {
- .res_name = "laq",
+ .res_name = "listenaq",
.resid = -1, // dynamically assigned
.res_alloc = laq_res_alloc,
.res_free = laq_res_free,
struct ckrm_classtype *clstype;
int resid;
- clstype = ckrm_find_classtype_by_name("socket_class");
+ clstype = ckrm_find_classtype_by_name("socketclass");
if (clstype == NULL) {
- printk(KERN_INFO " Unknown ckrm classtype<socket_class>");
+ printk(KERN_INFO " Unknown ckrm classtype<socketclass>");
return -ENOENT;
}
struct task_struct * p = (struct task_struct *) __data;
unsigned long interval;
- if (send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p))
- printk("*warning*: failed to send SIGALRM to %u\n", p->pid);
-
+ send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p);
interval = p->it_real_incr;
if (interval) {
if (interval > (unsigned long) LONG_MAX)
--- /dev/null
+/*
+ * kexec.c - kexec system call
+ * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/kexec.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/highmem.h>
+#include <net/checksum.h>
+#include <asm/page.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/system.h>
+
+/*
+ * When kexec transitions to the new kernel there is a one-to-one
+ * mapping between physical and virtual addresses. On processors
+ * where you can disable the MMU this is trivial, and easy. For
+ * others it is still a simple predictable page table to setup.
+ *
+ * In that environment kexec copies the new kernel to its final
+ * resting place. This means I can only support memory whose
+ * physical address can fit in an unsigned long. In particular
+ * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
+ * If the assembly stub has more restrictive requirements
+ * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
+ * defined more restrictively in <asm/kexec.h>.
+ *
+ * The code for the transition from the current kernel to the
+ * the new kernel is placed in the control_code_buffer, whose size
+ * is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single
+ * page of memory is necessary, but some architectures require more.
+ * Because this memory must be identity mapped in the transition from
+ * virtual to physical addresses it must live in the range
+ * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
+ * modifiable.
+ *
+ * The assembly stub in the control code buffer is passed a linked list
+ * of descriptor pages detailing the source pages of the new kernel,
+ * and the destination addresses of those source pages. As this data
+ * structure is not used in the context of the current OS, it must
+ * be self-contained.
+ *
+ * The code has been made to work with highmem pages and will use a
+ * destination page in its final resting place (if it happens
+ * to allocate it). The end product of this is that most of the
+ * physical address space, and most of RAM can be used.
+ *
+ * Future directions include:
+ * - allocating a page table with the control code buffer identity
+ * mapped, to simplify machine_kexec and make kexec_on_panic more
+ * reliable.
+ */
+
+/*
+ * KIMAGE_NO_DEST is an impossible destination address..., for
+ * allocating pages whose destination address we do not care about.
+ */
+#define KIMAGE_NO_DEST (-1UL)
+
+static int kimage_is_destination_range(
+ struct kimage *image, unsigned long start, unsigned long end);
+static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long dest);
+
+
+static int kimage_alloc(struct kimage **rimage,
+ unsigned long nr_segments, struct kexec_segment *segments)
+{
+ int result;
+ struct kimage *image;
+ size_t segment_bytes;
+ unsigned long i;
+
+ /* Allocate a controlling structure */
+ result = -ENOMEM;
+ image = kmalloc(sizeof(*image), GFP_KERNEL);
+ if (!image) {
+ goto out;
+ }
+ memset(image, 0, sizeof(*image));
+ image->head = 0;
+ image->entry = &image->head;
+ image->last_entry = &image->head;
+
+ /* Initialize the list of control pages */
+ INIT_LIST_HEAD(&image->control_pages);
+
+ /* Initialize the list of destination pages */
+ INIT_LIST_HEAD(&image->dest_pages);
+
+ /* Initialize the list of unuseable pages */
+ INIT_LIST_HEAD(&image->unuseable_pages);
+
+ /* Read in the segments */
+ image->nr_segments = nr_segments;
+ segment_bytes = nr_segments * sizeof*segments;
+ result = copy_from_user(image->segment, segments, segment_bytes);
+ if (result)
+ goto out;
+
+ /*
+ * Verify we have good destination addresses. The caller is
+ * responsible for making certain we don't attempt to load
+ * the new image into invalid or reserved areas of RAM. This
+ * just verifies it is an address we can use.
+ */
+ result = -EADDRNOTAVAIL;
+ for (i = 0; i < nr_segments; i++) {
+ unsigned long mend;
+ mend = ((unsigned long)(image->segment[i].mem)) +
+ image->segment[i].memsz;
+ if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
+ goto out;
+ }
+
+ /*
+ * Find a location for the control code buffer, and add it
+ * the vector of segments so that it's pages will also be
+ * counted as destination pages.
+ */
+ result = -ENOMEM;
+ image->control_code_page = kimage_alloc_control_pages(image,
+ get_order(KEXEC_CONTROL_CODE_SIZE));
+ if (!image->control_code_page) {
+ printk(KERN_ERR "Could not allocate control_code_buffer\n");
+ goto out;
+ }
+
+ result = 0;
+ out:
+ if (result == 0) {
+ *rimage = image;
+ } else {
+ kfree(image);
+ }
+ return result;
+}
+
+static int kimage_is_destination_range(
+ struct kimage *image, unsigned long start, unsigned long end)
+{
+ unsigned long i;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ unsigned long mstart, mend;
+ mstart = (unsigned long)image->segment[i].mem;
+ mend = mstart + image->segment[i].memsz;
+ if ((end > mstart) && (start < mend)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static struct page *kimage_alloc_pages(unsigned int gfp_mask, unsigned int order)
+{
+ struct page *pages;
+ pages = alloc_pages(gfp_mask, order);
+ if (pages) {
+ unsigned int count, i;
+ pages->mapping = NULL;
+ pages->private = order;
+ count = 1 << order;
+ for(i = 0; i < count; i++) {
+ SetPageReserved(pages + i);
+ }
+ }
+ return pages;
+}
+
+static void kimage_free_pages(struct page *page)
+{
+ unsigned int order, count, i;
+ order = page->private;
+ count = 1 << order;
+ for(i = 0; i < count; i++) {
+ ClearPageReserved(page + i);
+ }
+ __free_pages(page, order);
+}
+
+static void kimage_free_page_list(struct list_head *list)
+{
+ struct list_head *pos, *next;
+ list_for_each_safe(pos, next, list) {
+ struct page *page;
+
+ page = list_entry(pos, struct page, lru);
+ list_del(&page->lru);
+
+ kimage_free_pages(page);
+ }
+}
+
+struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order)
+{
+ /* Control pages are special, they are the intermediaries
+ * that are needed while we copy the rest of the pages
+ * to their final resting place. As such they must
+ * not conflict with either the destination addresses
+ * or memory the kernel is already using.
+ *
+ * The only case where we really need more than one of
+ * these are for architectures where we cannot disable
+ * the MMU and must instead generate an identity mapped
+ * page table for all of the memory.
+ *
+ * At worst this runs in O(N) of the image size.
+ */
+ struct list_head extra_pages;
+ struct page *pages;
+ unsigned int count;
+
+ count = 1 << order;
+ INIT_LIST_HEAD(&extra_pages);
+
+ /* Loop while I can allocate a page and the page allocated
+ * is a destination page.
+ */
+ do {
+ unsigned long pfn, epfn, addr, eaddr;
+ pages = kimage_alloc_pages(GFP_KERNEL, order);
+ if (!pages)
+ break;
+ pfn = page_to_pfn(pages);
+ epfn = pfn + count;
+ addr = pfn << PAGE_SHIFT;
+ eaddr = epfn << PAGE_SHIFT;
+ if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
+ kimage_is_destination_range(image, addr, eaddr))
+ {
+ list_add(&pages->lru, &extra_pages);
+ pages = NULL;
+ }
+ } while(!pages);
+ if (pages) {
+ /* Remember the allocated page... */
+ list_add(&pages->lru, &image->control_pages);
+
+ /* Because the page is already in it's destination
+ * location we will never allocate another page at
+ * that address. Therefore kimage_alloc_pages
+ * will not return it (again) and we don't need
+ * to give it an entry in image->segment[].
+ */
+ }
+ /* Deal with the destination pages I have inadvertently allocated.
+ *
+ * Ideally I would convert multi-page allocations into single
+ * page allocations, and add everyting to image->dest_pages.
+ *
+ * For now it is simpler to just free the pages.
+ */
+ kimage_free_page_list(&extra_pages);
+ return pages;
+
+}
+
+static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
+{
+ if (*image->entry != 0) {
+ image->entry++;
+ }
+ if (image->entry == image->last_entry) {
+ kimage_entry_t *ind_page;
+ struct page *page;
+ page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
+ if (!page) {
+ return -ENOMEM;
+ }
+ ind_page = page_address(page);
+ *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
+ image->entry = ind_page;
+ image->last_entry =
+ ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
+ }
+ *image->entry = entry;
+ image->entry++;
+ *image->entry = 0;
+ return 0;
+}
+
+static int kimage_set_destination(
+ struct kimage *image, unsigned long destination)
+{
+ int result;
+
+ destination &= PAGE_MASK;
+ result = kimage_add_entry(image, destination | IND_DESTINATION);
+ if (result == 0) {
+ image->destination = destination;
+ }
+ return result;
+}
+
+
+static int kimage_add_page(struct kimage *image, unsigned long page)
+{
+ int result;
+
+ page &= PAGE_MASK;
+ result = kimage_add_entry(image, page | IND_SOURCE);
+ if (result == 0) {
+ image->destination += PAGE_SIZE;
+ }
+ return result;
+}
+
+
+static void kimage_free_extra_pages(struct kimage *image)
+{
+ /* Walk through and free any extra destination pages I may have */
+ kimage_free_page_list(&image->dest_pages);
+
+ /* Walk through and free any unuseable pages I have cached */
+ kimage_free_page_list(&image->unuseable_pages);
+
+}
+static int kimage_terminate(struct kimage *image)
+{
+ int result;
+
+ result = kimage_add_entry(image, IND_DONE);
+ if (result == 0) {
+ /* Point at the terminating element */
+ image->entry--;
+ kimage_free_extra_pages(image);
+ }
+ return result;
+}
+
+#define for_each_kimage_entry(image, ptr, entry) \
+ for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
+ ptr = (entry & IND_INDIRECTION)? \
+ phys_to_virt((entry & PAGE_MASK)): ptr +1)
+
+static void kimage_free_entry(kimage_entry_t entry)
+{
+ struct page *page;
+
+ page = pfn_to_page(entry >> PAGE_SHIFT);
+ kimage_free_pages(page);
+}
+
+static void kimage_free(struct kimage *image)
+{
+ kimage_entry_t *ptr, entry;
+ kimage_entry_t ind = 0;
+
+ if (!image)
+ return;
+ kimage_free_extra_pages(image);
+ for_each_kimage_entry(image, ptr, entry) {
+ if (entry & IND_INDIRECTION) {
+ /* Free the previous indirection page */
+ if (ind & IND_INDIRECTION) {
+ kimage_free_entry(ind);
+ }
+ /* Save this indirection page until we are
+ * done with it.
+ */
+ ind = entry;
+ }
+ else if (entry & IND_SOURCE) {
+ kimage_free_entry(entry);
+ }
+ }
+ /* Free the final indirection page */
+ if (ind & IND_INDIRECTION) {
+ kimage_free_entry(ind);
+ }
+
+ /* Handle any machine specific cleanup */
+ machine_kexec_cleanup(image);
+
+ /* Free the kexec control pages... */
+ kimage_free_page_list(&image->control_pages);
+ kfree(image);
+}
+
+static kimage_entry_t *kimage_dst_used(struct kimage *image, unsigned long page)
+{
+ kimage_entry_t *ptr, entry;
+ unsigned long destination = 0;
+
+ for_each_kimage_entry(image, ptr, entry) {
+ if (entry & IND_DESTINATION) {
+ destination = entry & PAGE_MASK;
+ }
+ else if (entry & IND_SOURCE) {
+ if (page == destination) {
+ return ptr;
+ }
+ destination += PAGE_SIZE;
+ }
+ }
+ return 0;
+}
+
+static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long destination)
+{
+ /*
+ * Here we implement safeguards to ensure that a source page
+ * is not copied to its destination page before the data on
+ * the destination page is no longer useful.
+ *
+ * To do this we maintain the invariant that a source page is
+ * either its own destination page, or it is not a
+ * destination page at all.
+ *
+ * That is slightly stronger than required, but the proof
+ * that no problems will not occur is trivial, and the
+ * implementation is simply to verify.
+ *
+ * When allocating all pages normally this algorithm will run
+ * in O(N) time, but in the worst case it will run in O(N^2)
+ * time. If the runtime is a problem the data structures can
+ * be fixed.
+ */
+ struct page *page;
+ unsigned long addr;
+
+ /*
+ * Walk through the list of destination pages, and see if I
+ * have a match.
+ */
+ list_for_each_entry(page, &image->dest_pages, lru) {
+ addr = page_to_pfn(page) << PAGE_SHIFT;
+ if (addr == destination) {
+ list_del(&page->lru);
+ return page;
+ }
+ }
+ page = NULL;
+ while (1) {
+ kimage_entry_t *old;
+
+ /* Allocate a page, if we run out of memory give up */
+ page = kimage_alloc_pages(gfp_mask, 0);
+ if (!page) {
+ return 0;
+ }
+ /* If the page cannot be used file it away */
+ if (page_to_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
+ list_add(&page->lru, &image->unuseable_pages);
+ continue;
+ }
+ addr = page_to_pfn(page) << PAGE_SHIFT;
+
+ /* If it is the destination page we want use it */
+ if (addr == destination)
+ break;
+
+ /* If the page is not a destination page use it */
+ if (!kimage_is_destination_range(image, addr, addr + PAGE_SIZE))
+ break;
+
+ /*
+ * I know that the page is someones destination page.
+ * See if there is already a source page for this
+ * destination page. And if so swap the source pages.
+ */
+ old = kimage_dst_used(image, addr);
+ if (old) {
+ /* If so move it */
+ unsigned long old_addr;
+ struct page *old_page;
+
+ old_addr = *old & PAGE_MASK;
+ old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
+ copy_highpage(page, old_page);
+ *old = addr | (*old & ~PAGE_MASK);
+
+ /* The old page I have found cannot be a
+ * destination page, so return it.
+ */
+ addr = old_addr;
+ page = old_page;
+ break;
+ }
+ else {
+ /* Place the page on the destination list I
+ * will use it later.
+ */
+ list_add(&page->lru, &image->dest_pages);
+ }
+ }
+ return page;
+}
+
+static int kimage_load_segment(struct kimage *image,
+ struct kexec_segment *segment)
+{
+ unsigned long mstart;
+ int result;
+ unsigned long offset;
+ unsigned long offset_end;
+ unsigned char *buf;
+
+ result = 0;
+ buf = segment->buf;
+ mstart = (unsigned long)segment->mem;
+
+ offset_end = segment->memsz;
+
+ result = kimage_set_destination(image, mstart);
+ if (result < 0) {
+ goto out;
+ }
+ for (offset = 0; offset < segment->memsz; offset += PAGE_SIZE) {
+ struct page *page;
+ char *ptr;
+ size_t size, leader;
+ page = kimage_alloc_page(image, GFP_HIGHUSER, mstart + offset);
+ if (page == 0) {
+ result = -ENOMEM;
+ goto out;
+ }
+ result = kimage_add_page(image, page_to_pfn(page) << PAGE_SHIFT);
+ if (result < 0) {
+ goto out;
+ }
+ ptr = kmap(page);
+ if (segment->bufsz < offset) {
+ /* We are past the end zero the whole page */
+ memset(ptr, 0, PAGE_SIZE);
+ kunmap(page);
+ continue;
+ }
+ size = PAGE_SIZE;
+ leader = 0;
+ if ((offset == 0)) {
+ leader = mstart & ~PAGE_MASK;
+ }
+ if (leader) {
+ /* We are on the first page zero the unused portion */
+ memset(ptr, 0, leader);
+ size -= leader;
+ ptr += leader;
+ }
+ if (size > (segment->bufsz - offset)) {
+ size = segment->bufsz - offset;
+ }
+ if (size < (PAGE_SIZE - leader)) {
+ /* zero the trailing part of the page */
+ memset(ptr + size, 0, (PAGE_SIZE - leader) - size);
+ }
+ result = copy_from_user(ptr, buf + offset, size);
+ kunmap(page);
+ if (result) {
+ result = (result < 0) ? result : -EIO;
+ goto out;
+ }
+ }
+ out:
+ return result;
+}
+
+/*
+ * Exec Kernel system call: for obvious reasons only root may call it.
+ *
+ * This call breaks up into three pieces.
+ * - A generic part which loads the new kernel from the current
+ * address space, and very carefully places the data in the
+ * allocated pages.
+ *
+ * - A generic part that interacts with the kernel and tells all of
+ * the devices to shut down. Preventing on-going dmas, and placing
+ * the devices in a consistent state so a later kernel can
+ * reinitialize them.
+ *
+ * - A machine specific part that includes the syscall number
+ * and the copies the image to it's final destination. And
+ * jumps into the image at entry.
+ *
+ * kexec does not sync, or unmount filesystems so if you need
+ * that to happen you need to do that yourself.
+ */
+struct kimage *kexec_image = NULL;
+
+asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
+ struct kexec_segment *segments, unsigned long flags)
+{
+ struct kimage *image;
+ int result;
+
+ /* We only trust the superuser with rebooting the system. */
+ if (!capable(CAP_SYS_BOOT))
+ return -EPERM;
+
+ /*
+ * In case we need just a little bit of special behavior for
+ * reboot on panic.
+ */
+ if (flags != 0)
+ return -EINVAL;
+
+ if (nr_segments > KEXEC_SEGMENT_MAX)
+ return -EINVAL;
+
+ image = NULL;
+ result = 0;
+
+ if (nr_segments > 0) {
+ unsigned long i;
+ result = kimage_alloc(&image, nr_segments, segments);
+ if (result) {
+ goto out;
+ }
+ result = machine_kexec_prepare(image);
+ if (result) {
+ goto out;
+ }
+ image->start = entry;
+ for (i = 0; i < nr_segments; i++) {
+ result = kimage_load_segment(image, &image->segment[i]);
+ if (result) {
+ goto out;
+ }
+ }
+ result = kimage_terminate(image);
+ if (result) {
+ goto out;
+ }
+ }
+
+ image = xchg(&kexec_image, image);
+
+ out:
+ kimage_free(image);
+ return result;
+}
struct task_struct *t)
{
int error = -EINVAL;
- int user;
-
if (sig < 0 || sig > _NSIG)
return error;
-
- user = (!info ||
- (info != SEND_SIG_PRIV &&
- info != SEND_SIG_FORCED &&
- SI_FROMUSER(info)));
-
error = -EPERM;
- if (user && (sig != SIGCONT ||
- current->signal->session != t->signal->session)
+ if ((!info || ((unsigned long)info != 1 &&
+ (unsigned long)info != 2 && SI_FROMUSER(info)))
+ && ((sig != SIGCONT) ||
+ (current->signal->session != t->signal->session))
&& (current->euid ^ t->suid) && (current->euid ^ t->uid)
&& (current->uid ^ t->suid) && (current->uid ^ t->uid)
&& !capable(CAP_KILL))
return error;
-
- error = -ESRCH;
- if (user && !vx_check(vx_task_xid(t), VX_ADMIN|VX_IDENT))
- return error;
-
return security_task_kill(t, info, sig);
}
unsigned long flags;
int ret;
+ if (!vx_check(vx_task_xid(p), VX_ADMIN|VX_WATCH|VX_IDENT))
+ return -ESRCH;
+
ret = check_kill_permission(sig, info, p);
if (!ret && sig && p->sighand) {
spin_lock_irqsave(&p->sighand->siglock, flags);
#include <linux/init.h>
#include <linux/highuid.h>
#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
#include <linux/workqueue.h>
#include <linux/device.h>
#include <linux/times.h>
machine_restart(buffer);
break;
+#ifdef CONFIG_KEXEC
+ case LINUX_REBOOT_CMD_KEXEC:
+ {
+ struct kimage *image;
+ image = xchg(&kexec_image, 0);
+ if (!image) {
+ unlock_kernel();
+ return -EINVAL;
+ }
+ notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
+ system_state = SYSTEM_RESTART;
+ device_shutdown();
+ system_state = SYSTEM_BOOTING;
+ printk(KERN_EMERG "Starting new kernel\n");
+ machine_shutdown();
+ machine_kexec(image);
+ break;
+ }
+#endif
#ifdef CONFIG_SOFTWARE_SUSPEND
case LINUX_REBOOT_CMD_SW_SUSPEND:
{
--- /dev/null
+crc32table.h
+gen_crc32table
+++ /dev/null
-/*
- * ip_conntrack_pptp.c - Version 2.0
- *
- * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft. PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702. Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * Limitations:
- * - We blindly assume that control connections are always
- * established in PNS->PAC direction. This is a violation
- * of RFFC2673
- *
- * TODO: - finish support for multiple calls within one session
- * (needs expect reservations in newnat)
- * - testing of incoming PPTP calls
- *
- * Changes:
- * 2002-02-05 - Version 1.3
- * - Call ip_conntrack_unexpect_related() from
- * pptp_timeout_related() to destroy expectations in case
- * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
- * (Philip Craig <philipc@snapgear.com>)
- * - Add Version information at module loadtime
- * 2002-02-10 - Version 1.6
- * - move to C99 style initializers
- * - remove second expectation if first arrives
- * 2004-10-22 - Version 2.0
- * - merge Mandrake's 2.6.x port with recent 2.6.x API changes
- * - fix lots of linear skb assumptions from Mandrake's port
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/lockhelp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_CT_PPTP_VERSION "2.0"
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
-
-DECLARE_LOCK(ip_pptp_lock);
-
-#if 0
-#include "ip_conntrack_pptp_priv.h"
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define SECS *HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-#define DAYS * 24 HOURS
-
-#define PPTP_GRE_TIMEOUT (10 MINS)
-#define PPTP_GRE_STREAM_TIMEOUT (5 DAYS)
-
-static int pptp_expectfn(struct ip_conntrack *ct)
-{
- struct ip_conntrack *master;
- struct ip_conntrack_expect *exp;
-
- DEBUGP("increasing timeouts\n");
- /* increase timeout of GRE data channel conntrack entry */
- ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
- ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
-
- master = master_ct(ct);
- if (!master) {
- DEBUGP(" no master!!!\n");
- return 0;
- }
-
- exp = ct->master;
- if (!exp) {
- DEBUGP("no expectation!!\n");
- return 0;
- }
-
- DEBUGP("completing tuples with ct info\n");
- /* we can do this, since we're unconfirmed */
- if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key ==
- htonl(master->help.ct_pptp_info.pac_call_id)) {
- /* assume PNS->PAC */
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key =
- htonl(master->help.ct_pptp_info.pns_call_id);
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key =
- htonl(master->help.ct_pptp_info.pns_call_id);
- } else {
- /* assume PAC->PNS */
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key =
- htonl(master->help.ct_pptp_info.pac_call_id);
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key =
- htonl(master->help.ct_pptp_info.pac_call_id);
- }
-
- /* delete other expectation */
- if (exp->expected_list.next != &exp->expected_list) {
- struct ip_conntrack_expect *other_exp;
- struct list_head *cur_item, *next;
-
- for (cur_item = master->sibling_list.next;
- cur_item != &master->sibling_list; cur_item = next) {
- next = cur_item->next;
- other_exp = list_entry(cur_item,
- struct ip_conntrack_expect,
- expected_list);
- /* remove only if occurred at same sequence number */
- if (other_exp != exp && other_exp->seq == exp->seq) {
- DEBUGP("unexpecting other direction\n");
- ip_ct_gre_keymap_destroy(other_exp);
- ip_conntrack_unexpect_related(other_exp);
- }
- }
- }
-
- return 0;
-}
-
-/* timeout GRE data connections */
-static int pptp_timeout_related(struct ip_conntrack *ct)
-{
- struct list_head *cur_item, *next;
- struct ip_conntrack_expect *exp;
-
- /* FIXME: do we have to lock something ? */
- for (cur_item = ct->sibling_list.next;
- cur_item != &ct->sibling_list; cur_item = next) {
- next = cur_item->next;
- exp = list_entry(cur_item, struct ip_conntrack_expect,
- expected_list);
-
- ip_ct_gre_keymap_destroy(exp);
- if (!exp->sibling) {
- ip_conntrack_unexpect_related(exp);
- continue;
- }
-
- DEBUGP("setting timeout of conntrack %p to 0\n",
- exp->sibling);
- exp->sibling->proto.gre.timeout = 0;
- exp->sibling->proto.gre.stream_timeout = 0;
- /* refresh_acct will not modify counters if skb == NULL */
- ip_ct_refresh_acct(exp->sibling, 0, NULL, 0);
- }
-
- return 0;
-}
-
-/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
-static inline int
-exp_gre(struct ip_conntrack *master,
- u_int32_t seq,
- u_int16_t callid,
- u_int16_t peer_callid)
-{
- struct ip_conntrack_tuple inv_tuple;
- struct ip_conntrack_tuple exp_tuples[] = {
- /* tuple in original direction, PNS->PAC */
- { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
- .u = { .gre = { .key = htonl(ntohs(peer_callid)) } }
- },
- .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
- .u = { .gre = { .key = htonl(ntohs(callid)) } },
- .protonum = IPPROTO_GRE
- },
- },
- /* tuple in reply direction, PAC->PNS */
- { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
- .u = { .gre = { .key = htonl(ntohs(callid)) } }
- },
- .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
- .u = { .gre = { .key = htonl(ntohs(peer_callid)) } },
- .protonum = IPPROTO_GRE
- },
- }
- }, *exp_tuple;
-
- for (exp_tuple = exp_tuples; exp_tuple < &exp_tuples[2]; exp_tuple++) {
- struct ip_conntrack_expect *exp;
-
- exp = ip_conntrack_expect_alloc();
- if (exp == NULL)
- return 1;
-
- memcpy(&exp->tuple, exp_tuple, sizeof(exp->tuple));
-
- exp->mask.src.ip = 0xffffffff;
- exp->mask.src.u.all = 0;
- exp->mask.dst.u.all = 0;
- exp->mask.dst.u.gre.key = 0xffffffff;
- exp->mask.dst.ip = 0xffffffff;
- exp->mask.dst.protonum = 0xffff;
-
- exp->seq = seq;
- exp->expectfn = pptp_expectfn;
-
- exp->help.exp_pptp_info.pac_call_id = ntohs(callid);
- exp->help.exp_pptp_info.pns_call_id = ntohs(peer_callid);
-
- DEBUGP("calling expect_related ");
- DUMP_TUPLE_RAW(&exp->tuple);
-
- /* Add GRE keymap entries */
- if (ip_ct_gre_keymap_add(exp, &exp->tuple, 0) != 0) {
- kfree(exp);
- return 1;
- }
-
- invert_tuplepr(&inv_tuple, &exp->tuple);
- if (ip_ct_gre_keymap_add(exp, &inv_tuple, 1) != 0) {
- ip_ct_gre_keymap_destroy(exp);
- kfree(exp);
- return 1;
- }
-
- if (ip_conntrack_expect_related(exp, master) != 0) {
- ip_ct_gre_keymap_destroy(exp);
- kfree(exp);
- DEBUGP("cannot expect_related()\n");
- return 1;
- }
- }
-
- return 0;
-}
-
-static inline int
-pptp_inbound_pkt(struct sk_buff *skb,
- struct tcphdr *tcph,
- unsigned int ctlhoff,
- size_t datalen,
- struct ip_conntrack *ct)
-{
- struct PptpControlHeader _ctlh, *ctlh;
- unsigned int reqlen;
- union pptp_ctrl_union _pptpReq, *pptpReq;
- struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- u_int16_t msg, *cid, *pcid;
- u_int32_t seq;
-
- ctlh = skb_header_pointer(skb, ctlhoff, sizeof(_ctlh), &_ctlh);
- if (unlikely(!ctlh)) {
- DEBUGP("error during skb_header_pointer\n");
- return NF_ACCEPT;
- }
-
- reqlen = datalen - sizeof(struct pptp_pkt_hdr) - sizeof(_ctlh);
- pptpReq = skb_header_pointer(skb, ctlhoff+sizeof(struct pptp_pkt_hdr),
- reqlen, &_pptpReq);
- if (unlikely(!pptpReq)) {
- DEBUGP("error during skb_header_pointer\n");
- return NF_ACCEPT;
- }
-
- msg = ntohs(ctlh->messageType);
- DEBUGP("inbound control message %s\n", strMName[msg]);
-
- switch (msg) {
- case PPTP_START_SESSION_REPLY:
- if (reqlen < sizeof(_pptpReq.srep)) {
- DEBUGP("%s: short packet\n", strMName[msg]);
- break;
- }
-
- /* server confirms new control session */
- if (info->sstate < PPTP_SESSION_REQUESTED) {
- DEBUGP("%s without START_SESS_REQUEST\n",
- strMName[msg]);
- break;
- }
- if (pptpReq->srep.resultCode == PPTP_START_OK)
- info->sstate = PPTP_SESSION_CONFIRMED;
- else
- info->sstate = PPTP_SESSION_ERROR;
- break;
-
- case PPTP_STOP_SESSION_REPLY:
- if (reqlen < sizeof(_pptpReq.strep)) {
- DEBUGP("%s: short packet\n", strMName[msg]);
- break;
- }
-
- /* server confirms end of control session */
- if (info->sstate > PPTP_SESSION_STOPREQ) {
- DEBUGP("%s without STOP_SESS_REQUEST\n",
- strMName[msg]);
- break;
- }
- if (pptpReq->strep.resultCode == PPTP_STOP_OK)
- info->sstate = PPTP_SESSION_NONE;
- else
- info->sstate = PPTP_SESSION_ERROR;
- break;
-
- case PPTP_OUT_CALL_REPLY:
- if (reqlen < sizeof(_pptpReq.ocack)) {
- DEBUGP("%s: short packet\n", strMName[msg]);
- break;
- }
-
- /* server accepted call, we now expect GRE frames */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n", strMName[msg]);
- break;
- }
- if (info->cstate != PPTP_CALL_OUT_REQ &&
- info->cstate != PPTP_CALL_OUT_CONF) {
- DEBUGP("%s without OUTCALL_REQ\n", strMName[msg]);
- break;
- }
- if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
- info->cstate = PPTP_CALL_NONE;
- break;
- }
-
- cid = &pptpReq->ocack.callID;
- pcid = &pptpReq->ocack.peersCallID;
-
- info->pac_call_id = ntohs(*cid);
-
- if (htons(info->pns_call_id) != *pcid) {
- DEBUGP("%s for unknown callid %u\n",
- strMName[msg], ntohs(*pcid));
- break;
- }
-
- DEBUGP("%s, CID=%X, PCID=%X\n", strMName[msg],
- ntohs(*cid), ntohs(*pcid));
-
- info->cstate = PPTP_CALL_OUT_CONF;
-
- seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
- + sizeof(struct PptpControlHeader)
- + ((void *)pcid - (void *)pptpReq);
-
- if (exp_gre(ct, seq, *cid, *pcid) != 0)
- printk("ip_conntrack_pptp: error during exp_gre\n");
- break;
-
- case PPTP_IN_CALL_REQUEST:
- if (reqlen < sizeof(_pptpReq.icack)) {
- DEBUGP("%s: short packet\n", strMName[msg]);
- break;
- }
-
- /* server tells us about incoming call request */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n", strMName[msg]);
- break;
- }
- pcid = &pptpReq->icack.peersCallID;
- DEBUGP("%s, PCID=%X\n", strMName[msg], ntohs(*pcid));
- info->cstate = PPTP_CALL_IN_REQ;
- info->pac_call_id = ntohs(*pcid);
- break;
-
- case PPTP_IN_CALL_CONNECT:
- if (reqlen < sizeof(_pptpReq.iccon)) {
- DEBUGP("%s: short packet\n", strMName[msg]);
- break;
- }
-
- /* server tells us about incoming call established */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n", strMName[msg]);
- break;
- }
- if (info->sstate != PPTP_CALL_IN_REP
- && info->sstate != PPTP_CALL_IN_CONF) {
- DEBUGP("%s but never sent IN_CALL_REPLY\n",
- strMName[msg]);
- break;
- }
-
- pcid = &pptpReq->iccon.peersCallID;
- cid = &info->pac_call_id;
-
- if (info->pns_call_id != ntohs(*pcid)) {
- DEBUGP("%s for unknown CallID %u\n",
- strMName[msg], ntohs(*cid));
- break;
- }
-
- DEBUGP("%s, PCID=%X\n", strMName[msg], ntohs(*pcid));
- info->cstate = PPTP_CALL_IN_CONF;
-
- /* we expect a GRE connection from PAC to PNS */
- seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
- + sizeof(struct PptpControlHeader)
- + ((void *)pcid - (void *)pptpReq);
-
- if (exp_gre(ct, seq, *cid, *pcid) != 0)
- printk("ip_conntrack_pptp: error during exp_gre\n");
-
- break;
-
- case PPTP_CALL_DISCONNECT_NOTIFY:
- if (reqlen < sizeof(_pptpReq.disc)) {
- DEBUGP("%s: short packet\n", strMName[msg]);
- break;
- }
-
- /* server confirms disconnect */
- cid = &pptpReq->disc.callID;
- DEBUGP("%s, CID=%X\n", strMName[msg], ntohs(*cid));
- info->cstate = PPTP_CALL_NONE;
-
- /* untrack this call id, unexpect GRE packets */
- pptp_timeout_related(ct);
- break;
-
- case PPTP_WAN_ERROR_NOTIFY:
- break;
-
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* I don't have to explain these ;) */
- break;
- default:
- DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
- ? strMName[msg]:strMName[0], msg);
- break;
- }
-
- return NF_ACCEPT;
-
-}
-
-static inline int
-pptp_outbound_pkt(struct sk_buff *skb,
- struct tcphdr *tcph,
- unsigned int ctlhoff,
- size_t datalen,
- struct ip_conntrack *ct)
-{
- struct PptpControlHeader _ctlh, *ctlh;
- unsigned int reqlen;
- union pptp_ctrl_union _pptpReq, *pptpReq;
- struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- u_int16_t msg, *cid, *pcid;
-
- ctlh = skb_header_pointer(skb, ctlhoff, sizeof(_ctlh), &_ctlh);
- if (!ctlh)
- return NF_ACCEPT;
-
- reqlen = datalen - sizeof(struct pptp_pkt_hdr) - sizeof(_ctlh);
- pptpReq = skb_header_pointer(skb, ctlhoff+sizeof(_ctlh), reqlen,
- &_pptpReq);
- if (!pptpReq)
- return NF_ACCEPT;
-
- msg = ntohs(ctlh->messageType);
- DEBUGP("outbound control message %s\n", strMName[msg]);
-
- switch (msg) {
- case PPTP_START_SESSION_REQUEST:
- /* client requests for new control session */
- if (info->sstate != PPTP_SESSION_NONE) {
- DEBUGP("%s but we already have one",
- strMName[msg]);
- }
- info->sstate = PPTP_SESSION_REQUESTED;
- break;
- case PPTP_STOP_SESSION_REQUEST:
- /* client requests end of control session */
- info->sstate = PPTP_SESSION_STOPREQ;
- break;
-
- case PPTP_OUT_CALL_REQUEST:
- if (reqlen < sizeof(_pptpReq.ocreq)) {
- DEBUGP("%s: short packet\n", strMName[msg]);
- break;
- }
-
- /* client initiating connection to server */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n",
- strMName[msg]);
- break;
- }
- info->cstate = PPTP_CALL_OUT_REQ;
- /* track PNS call id */
- cid = &pptpReq->ocreq.callID;
- DEBUGP("%s, CID=%X\n", strMName[msg], ntohs(*cid));
- info->pns_call_id = ntohs(*cid);
- break;
- case PPTP_IN_CALL_REPLY:
- if (reqlen < sizeof(_pptpReq.icack)) {
- DEBUGP("%s: short packet\n", strMName[msg]);
- break;
- }
-
- /* client answers incoming call */
- if (info->cstate != PPTP_CALL_IN_REQ
- && info->cstate != PPTP_CALL_IN_REP) {
- DEBUGP("%s without incall_req\n",
- strMName[msg]);
- break;
- }
- if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
- info->cstate = PPTP_CALL_NONE;
- break;
- }
- pcid = &pptpReq->icack.peersCallID;
- if (info->pac_call_id != ntohs(*pcid)) {
- DEBUGP("%s for unknown call %u\n",
- strMName[msg], ntohs(*pcid));
- break;
- }
- DEBUGP("%s, CID=%X\n", strMName[msg], ntohs(*pcid));
- /* part two of the three-way handshake */
- info->cstate = PPTP_CALL_IN_REP;
- info->pns_call_id = ntohs(pptpReq->icack.callID);
- break;
-
- case PPTP_CALL_CLEAR_REQUEST:
- /* client requests hangup of call */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("CLEAR_CALL but no session\n");
- break;
- }
- /* FUTURE: iterate over all calls and check if
- * call ID is valid. We don't do this without newnat,
- * because we only know about last call */
- info->cstate = PPTP_CALL_CLEAR_REQ;
- break;
- case PPTP_SET_LINK_INFO:
- break;
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* I don't have to explain these ;) */
- break;
- default:
- DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)?
- strMName[msg]:strMName[0], msg);
- /* unknown: no need to create GRE masq table entry */
- break;
- }
-
- return NF_ACCEPT;
-}
-
-
-/* track caller id inside control connection, call expect_related */
-static int
-conntrack_pptp_help(struct sk_buff *skb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-
-{
- struct pptp_pkt_hdr _pptph, *pptph;
-
- struct tcphdr _tcph, *tcph;
- u_int32_t tcplen = skb->len - skb->nh.iph->ihl * 4;
- u_int32_t datalen;
- void *datalimit;
- int dir = CTINFO2DIR(ctinfo);
- struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- unsigned int nexthdr_off;
-
- int oldsstate, oldcstate;
- int ret;
-
- /* don't do any tracking before tcp handshake complete */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
- DEBUGP("ctinfo = %u, skipping\n", ctinfo);
- return NF_ACCEPT;
- }
-
- nexthdr_off = skb->nh.iph->ihl*4;
- tcph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_tcph),
- &_tcph);
- if (!tcph)
- return NF_ACCEPT;
-
- /* not a complete TCP header? */
- if (tcplen < sizeof(struct tcphdr) || tcplen < tcph->doff * 4) {
- DEBUGP("tcplen = %u\n", tcplen);
- return NF_ACCEPT;
- }
-
-
- datalen = tcplen - tcph->doff * 4;
-
- /* checksum invalid? */
- if (tcp_v4_check(tcph, tcplen, skb->nh.iph->saddr, skb->nh.iph->daddr,
- csum_partial((char *) tcph, tcplen, 0))) {
- printk(KERN_NOTICE __FILE__ ": bad csum\n");
- /* W2K PPTP server sends TCP packets with wrong checksum :(( */
- //return NF_ACCEPT;
- }
-
- if (tcph->fin || tcph->rst) {
- DEBUGP("RST/FIN received, timeouting GRE\n");
- /* can't do this after real newnat */
- info->cstate = PPTP_CALL_NONE;
-
- /* untrack this call id, unexpect GRE packets */
- pptp_timeout_related(ct);
- }
-
- nexthdr_off += tcph->doff*4;
- pptph = skb_header_pointer(skb, skb->nh.iph->ihl*4 + tcph->doff*4,
- sizeof(_pptph), &_pptph);
- if (!pptph) {
- DEBUGP("no full PPTP header, can't track\n");
- return NF_ACCEPT;
- }
-
- datalimit = (void *) pptph + datalen;
-
- /* if it's not a control message we can't do anything with it */
- if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
- ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
- DEBUGP("not a control packet\n");
- return NF_ACCEPT;
- }
-
- oldsstate = info->sstate;
- oldcstate = info->cstate;
-
- LOCK_BH(&ip_pptp_lock);
-
- nexthdr_off += sizeof(_pptph);
- /* FIXME: We just blindly assume that the control connection is always
- * established from PNS->PAC. However, RFC makes no guarantee */
- if (dir == IP_CT_DIR_ORIGINAL)
- /* client -> server (PNS -> PAC) */
- ret = pptp_outbound_pkt(skb, tcph, nexthdr_off, datalen, ct);
- else
- /* server -> client (PAC -> PNS) */
- ret = pptp_inbound_pkt(skb, tcph, nexthdr_off, datalen, ct);
- DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
- oldsstate, info->sstate, oldcstate, info->cstate);
- UNLOCK_BH(&ip_pptp_lock);
-
- return ret;
-}
-
-/* control protocol helper */
-static struct ip_conntrack_helper pptp = {
- .list = { NULL, NULL },
- .name = "pptp",
- .flags = IP_CT_HELPER_F_REUSE_EXPECT,
- .me = THIS_MODULE,
- .max_expected = 2,
- .timeout = 0,
- .tuple = { .src = { .ip = 0,
- .u = { .tcp = { .port =
- __constant_htons(PPTP_CONTROL_PORT) } }
- },
- .dst = { .ip = 0,
- .u = { .all = 0 },
- .protonum = IPPROTO_TCP
- }
- },
- .mask = { .src = { .ip = 0,
- .u = { .tcp = { .port = 0xffff } }
- },
- .dst = { .ip = 0,
- .u = { .all = 0 },
- .protonum = 0xffff
- }
- },
- .help = conntrack_pptp_help
-};
-
-/* ip_conntrack_pptp initialization */
-static int __init init(void)
-{
- int retcode;
-
- DEBUGP(__FILE__ ": registering helper\n");
- if ((retcode = ip_conntrack_helper_register(&pptp))) {
- printk(KERN_ERR "Unable to register conntrack application "
- "helper for pptp: %d\n", retcode);
- return -EIO;
- }
-
- printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
- return 0;
-}
-
-static void __exit fini(void)
-{
- ip_conntrack_helper_unregister(&pptp);
- printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
-}
-
-module_init(init);
-module_exit(fini);
-
-EXPORT_SYMBOL(ip_pptp_lock);
+++ /dev/null
-#ifndef _IP_CT_PPTP_PRIV_H
-#define _IP_CT_PPTP_PRIV_H
-
-/* PptpControlMessageType names */
-static const char *strMName[] = {
- "UNKNOWN_MESSAGE",
- "START_SESSION_REQUEST",
- "START_SESSION_REPLY",
- "STOP_SESSION_REQUEST",
- "STOP_SESSION_REPLY",
- "ECHO_REQUEST",
- "ECHO_REPLY",
- "OUT_CALL_REQUEST",
- "OUT_CALL_REPLY",
- "IN_CALL_REQUEST",
- "IN_CALL_REPLY",
- "IN_CALL_CONNECT",
- "CALL_CLEAR_REQUEST",
- "CALL_DISCONNECT_NOTIFY",
- "WAN_ERROR_NOTIFY",
- "SET_LINK_INFO"
-};
-
-#endif
+++ /dev/null
-/*
- * ip_conntrack_proto_gre.c - Version 2.0
- *
- * Connection tracking protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2004 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/in.h>
-#include <linux/list.h>
-
-#include <linux/netfilter_ipv4/lockhelp.h>
-
-DECLARE_RWLOCK(ip_ct_gre_lock);
-#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_ct_gre_lock)
-#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_ct_gre_lock)
-
-#include <linux/netfilter_ipv4/listhelp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
-
-/* shamelessly stolen from ip_conntrack_proto_udp.c */
-#define GRE_TIMEOUT (30*HZ)
-#define GRE_STREAM_TIMEOUT (180*HZ)
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
- NIPQUAD((x)->src.ip), ntohl((x)->src.u.gre.key), \
- NIPQUAD((x)->dst.ip), ntohl((x)->dst.u.gre.key))
-#else
-#define DEBUGP(x, args...)
-#define DUMP_TUPLE_GRE(x)
-#endif
-
-/* GRE KEYMAP HANDLING FUNCTIONS */
-static LIST_HEAD(gre_keymap_list);
-
-static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
- const struct ip_conntrack_tuple *t)
-{
- return ((km->tuple.src.ip == t->src.ip) &&
- (km->tuple.dst.ip == t->dst.ip) &&
- (km->tuple.dst.protonum == t->dst.protonum) &&
- (km->tuple.dst.u.all == t->dst.u.all));
-}
-
-/* look up the source key for a given tuple */
-static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t)
-{
- struct ip_ct_gre_keymap *km;
- u_int32_t key;
-
- READ_LOCK(&ip_ct_gre_lock);
- km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
- struct ip_ct_gre_keymap *, t);
- if (!km) {
- READ_UNLOCK(&ip_ct_gre_lock);
- return 0;
- }
-
- key = km->tuple.src.u.gre.key;
- READ_UNLOCK(&ip_ct_gre_lock);
-
- return key;
-}
-
-/* add a single keymap entry, associate with specified expect */
-int ip_ct_gre_keymap_add(struct ip_conntrack_expect *exp,
- struct ip_conntrack_tuple *t, int reply)
-{
- struct ip_ct_gre_keymap *km;
-
- km = kmalloc(sizeof(*km), GFP_ATOMIC);
- if (!km)
- return -1;
-
- /* initializing list head should be sufficient */
- memset(km, 0, sizeof(*km));
-
- memcpy(&km->tuple, t, sizeof(*t));
-
- if (!reply)
- exp->proto.gre.keymap_orig = km;
- else
- exp->proto.gre.keymap_reply = km;
-
- DEBUGP("adding new entry %p: ", km);
- DUMP_TUPLE_GRE(&km->tuple);
-
- WRITE_LOCK(&ip_ct_gre_lock);
- list_append(&gre_keymap_list, km);
- WRITE_UNLOCK(&ip_ct_gre_lock);
-
- return 0;
-}
-
-/* change the tuple of a keymap entry (used by nat helper) */
-void ip_ct_gre_keymap_change(struct ip_ct_gre_keymap *km,
- struct ip_conntrack_tuple *t)
-{
- if (!km)
- {
- printk(KERN_WARNING
- "NULL GRE conntrack keymap change requested\n");
- return;
- }
-
- DEBUGP("changing entry %p to: ", km);
- DUMP_TUPLE_GRE(t);
-
- WRITE_LOCK(&ip_ct_gre_lock);
- memcpy(&km->tuple, t, sizeof(km->tuple));
- WRITE_UNLOCK(&ip_ct_gre_lock);
-}
-
-/* destroy the keymap entries associated with specified expect */
-void ip_ct_gre_keymap_destroy(struct ip_conntrack_expect *exp)
-{
- DEBUGP("entering for exp %p\n", exp);
- WRITE_LOCK(&ip_ct_gre_lock);
- if (exp->proto.gre.keymap_orig) {
- DEBUGP("removing %p from list\n", exp->proto.gre.keymap_orig);
- list_del(&exp->proto.gre.keymap_orig->list);
- kfree(exp->proto.gre.keymap_orig);
- exp->proto.gre.keymap_orig = NULL;
- }
- if (exp->proto.gre.keymap_reply) {
- DEBUGP("removing %p from list\n", exp->proto.gre.keymap_reply);
- list_del(&exp->proto.gre.keymap_reply->list);
- kfree(exp->proto.gre.keymap_reply);
- exp->proto.gre.keymap_reply = NULL;
- }
- WRITE_UNLOCK(&ip_ct_gre_lock);
-}
-
-
-/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
-
-/* invert gre part of tuple */
-static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->dst.u.gre.key = orig->src.u.gre.key;
- tuple->src.u.gre.key = orig->dst.u.gre.key;
-
- return 1;
-}
-
-/* gre hdr info to tuple */
-static int gre_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct gre_hdr _grehdr, *grehdr;
- struct gre_hdr_pptp _pgrehdr, *pgrehdr;
- u_int32_t srckey;
-
- grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
- /* PPTP header is variable length, only need up to the call_id field */
- pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
-
- if (!grehdr || !pgrehdr)
- return 0;
-
- switch (grehdr->version) {
- case GRE_VERSION_1701:
- if (!grehdr->key) {
- DEBUGP("Can't track GRE without key\n");
- return 0;
- }
- tuple->dst.u.gre.key = *(gre_key(grehdr));
- break;
-
- case GRE_VERSION_PPTP:
- if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
- DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
- return 0;
- }
- tuple->dst.u.gre.key = htonl(ntohs(pgrehdr->call_id));
- break;
-
- default:
- printk(KERN_WARNING "unknown GRE version %hu\n",
- grehdr->version);
- return 0;
- }
-
- srckey = gre_keymap_lookup(tuple);
-
- tuple->src.u.gre.key = srckey;
-#if 0
- DEBUGP("found src key %x for tuple ", ntohl(srckey));
- DUMP_TUPLE_GRE(tuple);
-#endif
-
- return 1;
-}
-
-/* print gre part of tuple */
-static unsigned int gre_print_tuple(char *buffer,
- const struct ip_conntrack_tuple *tuple)
-{
- return sprintf(buffer, "srckey=0x%x dstkey=0x%x ",
- ntohl(tuple->src.u.gre.key),
- ntohl(tuple->dst.u.gre.key));
-}
-
-/* print private data for conntrack */
-static unsigned int gre_print_conntrack(char *buffer,
- const struct ip_conntrack *ct)
-{
- return sprintf(buffer, "timeout=%u, stream_timeout=%u ",
- (ct->proto.gre.timeout / HZ),
- (ct->proto.gre.stream_timeout / HZ));
-}
-
-/* Returns verdict for packet, and may modify conntrack */
-static int gre_packet(struct ip_conntrack *ct,
- const struct sk_buff *skb,
- enum ip_conntrack_info conntrackinfo)
-{
- /* If we've seen traffic both ways, this is a GRE connection.
- * Extend timeout. */
- if (ct->status & IPS_SEEN_REPLY) {
- ip_ct_refresh_acct(ct, conntrackinfo, skb,
- ct->proto.gre.stream_timeout);
- /* Also, more likely to be important, and not a probe. */
- set_bit(IPS_ASSURED_BIT, &ct->status);
- } else
- ip_ct_refresh_acct(ct, conntrackinfo, skb,
- ct->proto.gre.timeout);
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int gre_new(struct ip_conntrack *ct,
- const struct sk_buff *skb)
-{
- DEBUGP(": ");
- DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-
- /* initialize to sane value. Ideally a conntrack helper
- * (e.g. in case of pptp) is increasing them */
- ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
- ct->proto.gre.timeout = GRE_TIMEOUT;
-
- return 1;
-}
-
-/* Called when a conntrack entry has already been removed from the hashes
- * and is about to be deleted from memory */
-static void gre_destroy(struct ip_conntrack *ct)
-{
- struct ip_conntrack_expect *master = ct->master;
-
- DEBUGP(" entering\n");
-
- if (!master) {
- DEBUGP("no master exp for ct %p\n", ct);
- return;
- }
-
- ip_ct_gre_keymap_destroy(master);
-}
-
-/* protocol helper struct */
-static struct ip_conntrack_protocol gre = {
- .proto = IPPROTO_GRE,
- .name = "gre",
- .pkt_to_tuple = gre_pkt_to_tuple,
- .invert_tuple = gre_invert_tuple,
- .print_tuple = gre_print_tuple,
- .print_conntrack = gre_print_conntrack,
- .packet = gre_packet,
- .new = gre_new,
- .destroy = gre_destroy,
- .exp_matches_pkt = NULL,
- .me = THIS_MODULE
-};
-
-/* ip_conntrack_proto_gre initialization */
-static int __init init(void)
-{
- int retcode;
-
- if ((retcode = ip_conntrack_protocol_register(&gre))) {
- printk(KERN_ERR "Unable to register conntrack protocol "
- "helper for gre: %d\n", retcode);
- return -EIO;
- }
-
- return 0;
-}
-
-static void __exit fini(void)
-{
- struct list_head *pos, *n;
-
- /* delete all keymap entries */
- WRITE_LOCK(&ip_ct_gre_lock);
- list_for_each_safe(pos, n, &gre_keymap_list) {
- DEBUGP("deleting keymap %p at module unload time\n", pos);
- list_del(pos);
- kfree(pos);
- }
- WRITE_UNLOCK(&ip_ct_gre_lock);
-
- ip_conntrack_protocol_unregister(&gre);
-}
-
-EXPORT_SYMBOL(ip_ct_gre_keymap_add);
-EXPORT_SYMBOL(ip_ct_gre_keymap_change);
-EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
-
-module_init(init);
-module_exit(fini);
+++ /dev/null
-/*
- * ip_nat_pptp.c - Version 2.0
- *
- * NAT support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft. PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702. Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2004 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * TODO: - Support for multiple calls within one session
- * (needs netfilter newnat code)
- * - NAT to a unique tuple, not to TCP source port
- * (needs netfilter tuple reservation)
- *
- * Changes:
- * 2002-02-10 - Version 1.3
- * - Use ip_nat_mangle_tcp_packet() because of cloned skb's
- * in local connections (Philip Craig <philipc@snapgear.com>)
- * - add checks for magicCookie and pptp version
- * - make argument list of pptp_{out,in}bound_packet() shorter
- * - move to C99 style initializers
- * - print version number at module loadtime
- * 2003-09-22 - Version 1.5
- * - use SNATed tcp sourceport as callid, since we get called before
- * TCP header is mangled (Philip Craig <philipc@snapgear.com>)
- * 2004-10-22 - Version 2.0
- * - kernel 2.6.x version
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_pptp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_NAT_PPTP_VERSION "2.0"
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
-
-
-#if 0
-#include "ip_conntrack_pptp_priv.h"
-#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \
- ": " format, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static unsigned int
-pptp_nat_expected(struct sk_buff **pskb,
- unsigned int hooknum,
- struct ip_conntrack *ct,
- struct ip_nat_info *info)
-{
- struct ip_conntrack *master = master_ct(ct);
- struct ip_nat_multi_range mr;
- struct ip_ct_pptp_master *ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info;
- u_int32_t newip, newcid;
- int ret;
-
- IP_NF_ASSERT(info);
- IP_NF_ASSERT(master);
- IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));
-
- DEBUGP("we have a connection!\n");
-
- LOCK_BH(&ip_pptp_lock);
- ct_pptp_info = &master->help.ct_pptp_info;
- nat_pptp_info = &master->nat.help.nat_pptp_info;
-
- /* need to alter GRE tuple because conntrack expectfn() used 'wrong'
- * (unmanipulated) values */
- if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) {
- DEBUGP("completing tuples with NAT info \n");
- /* we can do this, since we're unconfirmed */
- if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key ==
- htonl(ct_pptp_info->pac_call_id)) {
- /* assume PNS->PAC */
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key =
- htonl(nat_pptp_info->pns_call_id);
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key =
- htonl(nat_pptp_info->pns_call_id);
- newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- newcid = htonl(nat_pptp_info->pac_call_id);
- } else {
- /* assume PAC->PNS */
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key =
- htonl(nat_pptp_info->pac_call_id);
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key =
- htonl(nat_pptp_info->pac_call_id);
- newip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- newcid = htonl(nat_pptp_info->pns_call_id);
- }
- } else {
- if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key ==
- htonl(ct_pptp_info->pac_call_id)) {
- /* assume PNS->PAC */
- newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- newcid = htonl(ct_pptp_info->pns_call_id);
- }
- else {
- /* assume PAC->PNS */
- newip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- newcid = htonl(ct_pptp_info->pac_call_id);
- }
- }
-
- mr.rangesize = 1;
- mr.range[0].flags = IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED;
- mr.range[0].min_ip = mr.range[0].max_ip = newip;
- mr.range[0].min = mr.range[0].max =
- ((union ip_conntrack_manip_proto ) { newcid });
- DEBUGP("change ip to %u.%u.%u.%u\n",
- NIPQUAD(newip));
- DEBUGP("change key to 0x%x\n", ntohl(newcid));
- ret = ip_nat_setup_info(ct, &mr, hooknum);
-
- UNLOCK_BH(&ip_pptp_lock);
-
- return ret;
-
-}
-
-/* outbound packets == from PNS to PAC */
-static inline unsigned int
-pptp_outbound_pkt(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *exp)
-
-{
- struct iphdr *iph = (*pskb)->nh.iph;
- struct tcphdr *tcph = (void *) iph + iph->ihl*4;
- struct pptp_pkt_hdr *pptph = (struct pptp_pkt_hdr *)
- ((void *)tcph + tcph->doff*4);
-
- struct PptpControlHeader *ctlh;
- union pptp_ctrl_union *pptpReq;
- struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-
- u_int16_t msg, *cid = NULL, new_callid;
-
- /* FIXME: size checks !!! */
- ctlh = (struct PptpControlHeader *) ((void *) pptph + sizeof(*pptph));
- pptpReq = (void *) ((void *) ctlh + sizeof(*ctlh));
-
- new_callid = htons(ct_pptp_info->pns_call_id);
-
- switch (msg = ntohs(ctlh->messageType)) {
- case PPTP_OUT_CALL_REQUEST:
- cid = &pptpReq->ocreq.callID;
- /* FIXME: ideally we would want to reserve a call ID
- * here. current netfilter NAT core is not able to do
- * this :( For now we use TCP source port. This breaks
- * multiple calls within one control session */
-
- /* save original call ID in nat_info */
- nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
-
- /* don't use tcph->source since we are at a DSTmanip
- * hook (e.g. PREROUTING) and pkt is not mangled yet */
- new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
-
- /* save new call ID in ct info */
- ct_pptp_info->pns_call_id = ntohs(new_callid);
- break;
- case PPTP_IN_CALL_REPLY:
- cid = &pptpReq->icreq.callID;
- break;
- case PPTP_CALL_CLEAR_REQUEST:
- cid = &pptpReq->clrreq.callID;
- break;
- default:
- DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
- (msg <= PPTP_MSG_MAX)? strMName[msg]:strMName[0]);
- /* fall through */
-
- case PPTP_SET_LINK_INFO:
- /* only need to NAT in case PAC is behind NAT box */
- case PPTP_START_SESSION_REQUEST:
- case PPTP_START_SESSION_REPLY:
- case PPTP_STOP_SESSION_REQUEST:
- case PPTP_STOP_SESSION_REPLY:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* no need to alter packet */
- return NF_ACCEPT;
- }
-
- IP_NF_ASSERT(cid);
-
- DEBUGP("altering call id from 0x%04x to 0x%04x\n",
- ntohs(*cid), ntohs(new_callid));
-
- /* mangle packet */
- ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, (void *)cid - (void *)pptph,
- sizeof(new_callid), (char *)&new_callid,
- sizeof(new_callid));
-
- return NF_ACCEPT;
-}
-
-/* inbound packets == from PAC to PNS */
-static inline unsigned int
-pptp_inbound_pkt(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *oldexp)
-{
- struct iphdr *iph = (*pskb)->nh.iph;
- struct tcphdr *tcph = (void *) iph + iph->ihl*4;
- struct pptp_pkt_hdr *pptph = (struct pptp_pkt_hdr *)
- ((void *)tcph + tcph->doff*4);
-
- struct PptpControlHeader *ctlh;
- union pptp_ctrl_union *pptpReq;
- struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-
- u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL;
- u_int32_t old_dst_ip;
-
- struct ip_conntrack_tuple t, inv_t;
- struct ip_conntrack_tuple *orig_t, *reply_t;
-
- /* FIXME: size checks !!! */
- ctlh = (struct PptpControlHeader *) ((void *) pptph + sizeof(*pptph));
- pptpReq = (void *) ((void *) ctlh + sizeof(*ctlh));
-
- new_pcid = htons(nat_pptp_info->pns_call_id);
-
- switch (msg = ntohs(ctlh->messageType)) {
- case PPTP_OUT_CALL_REPLY:
- pcid = &pptpReq->ocack.peersCallID;
- cid = &pptpReq->ocack.callID;
- if (!oldexp) {
- DEBUGP("outcall but no expectation\n");
- break;
- }
- old_dst_ip = oldexp->tuple.dst.ip;
- t = oldexp->tuple;
- invert_tuplepr(&inv_t, &t);
-
- /* save original PAC call ID in nat_info */
- nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
-
- /* alter expectation */
- orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- if (t.src.ip == orig_t->src.ip && t.dst.ip == orig_t->dst.ip) {
- /* expectation for PNS->PAC direction */
- t.src.u.gre.key = htonl(nat_pptp_info->pns_call_id);
- t.dst.u.gre.key = htonl(ct_pptp_info->pac_call_id);
- inv_t.src.ip = reply_t->src.ip;
- inv_t.dst.ip = reply_t->dst.ip;
- inv_t.src.u.gre.key = htonl(nat_pptp_info->pac_call_id);
- inv_t.dst.u.gre.key = htonl(ct_pptp_info->pns_call_id);
- } else {
- /* expectation for PAC->PNS direction */
- t.src.u.gre.key = htonl(nat_pptp_info->pac_call_id);
- t.dst.u.gre.key = htonl(ct_pptp_info->pns_call_id);
- inv_t.src.ip = orig_t->src.ip;
- inv_t.dst.ip = orig_t->dst.ip;
- inv_t.src.u.gre.key = htonl(nat_pptp_info->pns_call_id);
- inv_t.dst.u.gre.key = htonl(ct_pptp_info->pac_call_id);
- }
-
- if (!ip_conntrack_change_expect(oldexp, &t)) {
- DEBUGP("successfully changed expect\n");
- } else {
- DEBUGP("can't change expect\n");
- }
- ip_ct_gre_keymap_change(oldexp->proto.gre.keymap_orig, &t);
- ip_ct_gre_keymap_change(oldexp->proto.gre.keymap_reply, &inv_t);
- break;
- case PPTP_IN_CALL_CONNECT:
- pcid = &pptpReq->iccon.peersCallID;
- if (!oldexp)
- break;
- old_dst_ip = oldexp->tuple.dst.ip;
- t = oldexp->tuple;
-
- /* alter expectation, no need for callID */
- if (t.dst.ip == ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip) {
- /* expectation for PNS->PAC direction */
- t.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- } else {
- /* expectation for PAC->PNS direction */
- t.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- }
-
- if (!ip_conntrack_change_expect(oldexp, &t)) {
- DEBUGP("successfully changed expect\n");
- } else {
- DEBUGP("can't change expect\n");
- }
- break;
- case PPTP_IN_CALL_REQUEST:
- /* only need to nat in case PAC is behind NAT box */
- break;
- case PPTP_WAN_ERROR_NOTIFY:
- pcid = &pptpReq->wanerr.peersCallID;
- break;
- case PPTP_CALL_DISCONNECT_NOTIFY:
- pcid = &pptpReq->disc.callID;
- break;
-
- default:
- DEBUGP("unknown inbound packet %s\n",
- (msg <= PPTP_MSG_MAX)? strMName[msg]:strMName[0]);
- /* fall through */
-
- case PPTP_START_SESSION_REQUEST:
- case PPTP_START_SESSION_REPLY:
- case PPTP_STOP_SESSION_REQUEST:
- case PPTP_STOP_SESSION_REPLY:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* no need to alter packet */
- return NF_ACCEPT;
- }
-
- /* mangle packet */
- IP_NF_ASSERT(pcid);
- DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
- ntohs(*pcid), ntohs(new_pcid));
- ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, (void *)pcid - (void *)pptph,
- sizeof(new_pcid), (char *)&new_pcid,
- sizeof(new_pcid));
-
- if (new_cid) {
- IP_NF_ASSERT(cid);
- DEBUGP("altering call id from 0x%04x to 0x%04x\n",
- ntohs(*cid), ntohs(new_cid));
- ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- (void *)cid - (void *)pptph,
- sizeof(new_cid), (char *)&new_cid,
- sizeof(new_cid));
- }
-
- /* great, at least we don't need to resize packets */
- return NF_ACCEPT;
-}
-
-
-static unsigned int tcp_help(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp,
- struct ip_nat_info *info,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum, struct sk_buff **pskb)
-{
- struct iphdr *iph = (*pskb)->nh.iph;
- struct tcphdr *tcph = (void *) iph + iph->ihl*4;
- unsigned int datalen = (*pskb)->len - iph->ihl*4 - tcph->doff*4;
- struct pptp_pkt_hdr *pptph;
-
- int dir;
-
- DEBUGP("entering\n");
-
- /* Only mangle things once: DST for original direction
- and SRC for reply direction. */
- dir = CTINFO2DIR(ctinfo);
- if (!((HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- && dir == IP_CT_DIR_ORIGINAL)
- || (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST
- && dir == IP_CT_DIR_REPLY))) {
- DEBUGP("Not touching dir %s at hook %s\n",
- dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY",
- hooknum == NF_IP_POST_ROUTING ? "POSTROUTING"
- : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING"
- : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT"
- : hooknum == NF_IP_LOCAL_IN ? "INPUT" : "???");
- return NF_ACCEPT;
- }
-
- /* if packet is too small, just skip it */
- if (datalen < sizeof(struct pptp_pkt_hdr)+
- sizeof(struct PptpControlHeader)) {
- DEBUGP("pptp packet too short\n");
- return NF_ACCEPT;
- }
-
- pptph = (struct pptp_pkt_hdr *) ((void *)tcph + tcph->doff*4);
-
- /* if it's not a control message, we can't handle it */
- if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
- ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
- DEBUGP("not a pptp control packet\n");
- return NF_ACCEPT;
- }
-
- LOCK_BH(&ip_pptp_lock);
-
- if (dir == IP_CT_DIR_ORIGINAL) {
- /* reuqests sent by client to server (PNS->PAC) */
- pptp_outbound_pkt(pskb, ct, ctinfo, exp);
- } else {
- /* response from the server to the client (PAC->PNS) */
- pptp_inbound_pkt(pskb, ct, ctinfo, exp);
- }
-
- UNLOCK_BH(&ip_pptp_lock);
-
- return NF_ACCEPT;
-}
-
-/* nat helper struct for control connection */
-static struct ip_nat_helper pptp_tcp_helper = {
- .list = { NULL, NULL },
- .name = "pptp",
- .flags = IP_NAT_HELPER_F_ALWAYS,
- .me = THIS_MODULE,
- .tuple = { .src = { .ip = 0,
- .u = { .tcp = { .port =
- __constant_htons(PPTP_CONTROL_PORT) }
- }
- },
- .dst = { .ip = 0,
- .u = { .all = 0 },
- .protonum = IPPROTO_TCP
- }
- },
-
- .mask = { .src = { .ip = 0,
- .u = { .tcp = { .port = 0xFFFF } }
- },
- .dst = { .ip = 0,
- .u = { .all = 0 },
- .protonum = 0xFFFF
- }
- },
- .help = tcp_help,
- .expect = pptp_nat_expected
-};
-
-
-static int __init init(void)
-{
- DEBUGP("%s: registering NAT helper\n", __FILE__);
- if (ip_nat_helper_register(&pptp_tcp_helper)) {
- printk(KERN_ERR "Unable to register NAT application helper "
- "for pptp\n");
- return -EIO;
- }
-
- printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
- return 0;
-}
-
-static void __exit fini(void)
-{
- DEBUGP("cleanup_module\n" );
- ip_nat_helper_unregister(&pptp_tcp_helper);
- printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
-}
-
-module_init(init);
-module_exit(fini);
+++ /dev/null
-/*
- * ip_nat_proto_gre.c - Version 2.0
- *
- * NAT protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2004 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \
- ": " format, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
-/* is key in given range between min and max */
-static int
-gre_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- u_int32_t key;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- key = tuple->src.u.gre.key;
- else
- key = tuple->dst.u.gre.key;
-
- return ntohl(key) >= ntohl(min->gre.key)
- && ntohl(key) <= ntohl(max->gre.key);
-}
-
-/* generate unique tuple ... */
-static int
-gre_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- u_int32_t min, i, range_size;
- u_int32_t key = 0, *keyptr;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- keyptr = &tuple->src.u.gre.key;
- else
- keyptr = &tuple->dst.u.gre.key;
-
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- DEBUGP("%p: NATing GRE PPTP\n", conntrack);
- min = 1;
- range_size = 0xffff;
- } else {
- min = ntohl(range->min.gre.key);
- range_size = ntohl(range->max.gre.key) - min + 1;
- }
-
- DEBUGP("min = %u, range_size = %u\n", min, range_size);
-
- for (i = 0; i < range_size; i++, key++) {
- *keyptr = htonl(min + key % range_size);
- if (!ip_nat_used_tuple(tuple, conntrack))
- return 1;
- }
-
- DEBUGP("%p: no NAT mapping\n", conntrack);
-
- return 0;
-}
-
-/* manipulate a GRE packet according to maniptype */
-static int
-gre_manip_pkt(struct sk_buff **pskb,
- unsigned int hdroff,
- const struct ip_conntrack_manip *manip,
- enum ip_nat_manip_type maniptype)
-{
- struct gre_hdr *greh;
- struct gre_hdr_pptp *pgreh;
-
- if (!skb_ip_make_writable(pskb, hdroff + sizeof(*pgreh)))
- return 0;
-
- greh = (void *)(*pskb)->data + hdroff;
- pgreh = (struct gre_hdr_pptp *) greh;
-
- /* we only have destination manip of a packet, since 'source key'
- * is not present in the packet itself */
- if (maniptype == IP_NAT_MANIP_DST) {
- /* key manipulation is always dest */
- switch (greh->version) {
- case 0:
- if (!greh->key) {
- DEBUGP("can't nat GRE w/o key\n");
- break;
- }
- if (greh->csum) {
- /* FIXME: Never tested this code... */
- *(gre_csum(greh)) =
- ip_nat_cheat_check(~*(gre_key(greh)),
- manip->u.gre.key,
- *(gre_csum(greh)));
- }
- *(gre_key(greh)) = manip->u.gre.key;
- break;
- case GRE_VERSION_PPTP:
- DEBUGP("call_id -> 0x%04x\n",
- ntohl(manip->u.gre.key));
- pgreh->call_id = htons(ntohl(manip->u.gre.key));
- break;
- default:
- DEBUGP("can't nat unknown GRE version\n");
- return 0;
- break;
- }
- }
- return 1;
-}
-
-/* print out a nat tuple */
-static unsigned int
-gre_print(char *buffer,
- const struct ip_conntrack_tuple *match,
- const struct ip_conntrack_tuple *mask)
-{
- unsigned int len = 0;
-
- if (mask->src.u.gre.key)
- len += sprintf(buffer + len, "srckey=0x%x ",
- ntohl(match->src.u.gre.key));
-
- if (mask->dst.u.gre.key)
- len += sprintf(buffer + len, "dstkey=0x%x ",
- ntohl(match->src.u.gre.key));
-
- return len;
-}
-
-/* print a range of keys */
-static unsigned int
-gre_print_range(char *buffer, const struct ip_nat_range *range)
-{
- if (range->min.gre.key != 0
- || range->max.gre.key != 0xFFFF) {
- if (range->min.gre.key == range->max.gre.key)
- return sprintf(buffer, "key 0x%x ",
- ntohl(range->min.gre.key));
- else
- return sprintf(buffer, "keys 0x%u-0x%u ",
- ntohl(range->min.gre.key),
- ntohl(range->max.gre.key));
- } else
- return 0;
-}
-
-/* nat helper struct */
-static struct ip_nat_protocol gre = {
- .name = "GRE",
- .protonum = IPPROTO_GRE,
- .manip_pkt = gre_manip_pkt,
- .in_range = gre_in_range,
- .unique_tuple = gre_unique_tuple,
- .print = gre_print,
- .print_range = gre_print_range
-};
-
-static int __init init(void)
-{
- if (ip_nat_protocol_register(&gre))
- return -EIO;
-
- return 0;
-}
-
-static void __exit fini(void)
-{
- ip_nat_protocol_unregister(&gre);
-}
-
-module_init(init);
-module_exit(fini);
return 0;
}
+/* XXX (mef) need to generalize the IPOD stuff. Right now I am borrowing
+ from the ICMP infrastructure. */
+#ifdef CONFIG_ICMP_IPOD
+#include <linux/reboot.h>
+
+extern int sysctl_icmp_ipod_version;
+extern int sysctl_icmp_ipod_enabled;
+extern u32 sysctl_icmp_ipod_host;
+extern u32 sysctl_icmp_ipod_mask;
+extern char sysctl_icmp_ipod_key[32+1];
+#define IPOD_CHECK_KEY \
+ (sysctl_icmp_ipod_key[0] != 0)
+#define IPOD_VALID_KEY(d) \
+ (strncmp(sysctl_icmp_ipod_key, (char *)(d), strlen(sysctl_icmp_ipod_key)) == 0)
+
+static void udp_ping_of_death(struct sk_buff *skb, struct udphdr *uh, u32 saddr)
+{
+ int doit = 0;
+
+ /*
+ * If IPOD not enabled or wrong UDP IPOD port, ignore.
+ */
+ if (!sysctl_icmp_ipod_enabled || (ntohs(uh->dest) != 664))
+ return;
+
+#if 0
+ printk(KERN_INFO "IPOD: got udp pod request, host=%u.%u.%u.%u\n", NIPQUAD(saddr));
+#endif
+
+
+ /*
+ * First check the source address info.
+ * If host not set, ignore.
+ */
+ if (sysctl_icmp_ipod_host != 0xffffffff &&
+ (ntohl(saddr) & sysctl_icmp_ipod_mask) == sysctl_icmp_ipod_host) {
+ /*
+ * Now check the key if enabled.
+ * If packet doesn't contain enough data or key
+ * is otherwise invalid, ignore.
+ */
+ if (IPOD_CHECK_KEY) {
+ if (pskb_may_pull(skb, sizeof(sysctl_icmp_ipod_key)+sizeof(struct udphdr)-1)){
+#if 0
+ int i;
+ for (i=0;i<32+1;i++){
+ printk("%c",((char*)skb->data)[i+sizeof(struct udphdr)]);
+ }
+ printk("\n");
+#endif
+ if (IPOD_VALID_KEY(skb->data+sizeof(struct udphdr)))
+ doit = 1;
+ }
+ } else {
+ doit = 1;
+ }
+ }
+ if (doit) {
+ sysctl_icmp_ipod_enabled = 0;
+ printk(KERN_CRIT "IPOD: reboot forced by %u.%u.%u.%u...\n",
+ NIPQUAD(saddr));
+ machine_restart(NULL);
+ } else {
+ printk(KERN_WARNING "IPOD: from %u.%u.%u.%u rejected\n",
+ NIPQUAD(saddr));
+ }
+}
+#endif
+
/*
* All we need to do is get the socket, and then do a checksum.
*/
if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
+#ifdef CONFIG_ICMP_IPOD
+ udp_ping_of_death(skb, uh, saddr);
+#endif
+
sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
if (sk != NULL) {
--- /dev/null
+bin2c
+conmakehash
+kallsyms
+pnmtologo
--- /dev/null
+docproc
+fixdep
+split-include
--- /dev/null
+conf
+lex.zconf.c
+mconf
+zconf.tab.c
+zconf.tab.h
%define kversion 2.6.%{sublevel}
%define rpmversion 2.6.%{sublevel}
%define rhbsys %([ -r /etc/beehive-root ] && echo || echo .`whoami`)
-%define release 1.521.2.6.planetlab%{?date:.%{date}}
+%define release 1.521.3.planetlab%{?date:.%{date}}
%define signmodules 0
%define KVERREL %{PACKAGE_VERSION}-%{PACKAGE_RELEASE}
--- /dev/null
+elfconfig.h
+mk_elfconfig
+modpost
--- /dev/null
+gen_init_cpio
+initramfs_data.cpio
+initramfs_data.cpio.gz