diff -Nurb linux-2.6.27-720/drivers/input/serio/i8042-ppcio.h linux-2.6.27-710/drivers/input/serio/i8042-ppcio.h --- linux-2.6.27-720/drivers/input/serio/i8042-ppcio.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/drivers/input/serio/i8042-ppcio.h 2008-10-09 18:13:53.000000000 -0400 @@ -77,7 +77,7 @@ asm volatile("lis 7,0xff88 \n\ lswi 6,7,0x8 \n\ mr %0,6" - : "=r" (kbd_data) : : "6", "7"); + : "=r" (kbd_data) :: "6", "7"); __raw_writel(0x00000000, 0xff50000c); eieio(); @@ -99,7 +99,7 @@ ori 7,7,0x8 \n\ lswi 6,7,0x8 \n\ mr %0,6" - : "=r" (kbd_status) : : "6", "7"); + : "=r" (kbd_status) :: "6", "7"); __raw_writel(0x00000000, 0xff50000c); eieio(); diff -Nurb linux-2.6.27-720/drivers/net/wireless/rayctl.h linux-2.6.27-710/drivers/net/wireless/rayctl.h --- linux-2.6.27-720/drivers/net/wireless/rayctl.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/drivers/net/wireless/rayctl.h 2008-10-09 18:13:53.000000000 -0400 @@ -418,7 +418,9 @@ }; /****** Host-to-ECF Data Area at Shared RAM offset 0x200 *********************/ -EMPTY_STRUCT_DECL(host_to_ecf_area); +struct host_to_ecf_area { + +}; /****** ECF-to-Host Data Area at Shared RAM offset 0x0300 ********************/ struct startup_res_518 { diff -Nurb linux-2.6.27-720/drivers/video/i810/i810_main.h linux-2.6.27-710/drivers/video/i810/i810_main.h --- linux-2.6.27-720/drivers/video/i810/i810_main.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/drivers/video/i810/i810_main.h 2008-10-09 18:13:53.000000000 -0400 @@ -54,7 +54,7 @@ #ifdef CONFIG_X86 static inline void flush_cache(void) { - asm volatile ("wbinvd": : :"memory"); + asm volatile ("wbinvd":::"memory"); } #else #define flush_cache() do { } while(0) diff -Nurb linux-2.6.27-720/fs/file_table.c linux-2.6.27-710/fs/file_table.c --- linux-2.6.27-720/fs/file_table.c 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/fs/file_table.c 2009-05-04 12:15:11.000000000 -0400 @@ -34,8 +34,6 @@ /* public. Not pretty! */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); -EXPORT_SYMBOL(files_lock); - static struct percpu_counter nr_files __cacheline_aligned_in_smp; static inline void file_free_rcu(struct rcu_head *head) diff -Nurb linux-2.6.27-720/fs/super.c linux-2.6.27-710/fs/super.c --- linux-2.6.27-720/fs/super.c 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/fs/super.c 2009-05-04 12:15:12.000000000 -0400 @@ -48,8 +48,6 @@ LIST_HEAD(super_blocks); DEFINE_SPINLOCK(sb_lock); -EXPORT_SYMBOL(sb_lock); - /** * alloc_super - create new superblock * @type: filesystem type superblock should belong to diff -Nurb linux-2.6.27-720/include/asm-cris/arch-v10/io.h linux-2.6.27-710/include/asm-cris/arch-v10/io.h --- linux-2.6.27-720/include/asm-cris/arch-v10/io.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-cris/arch-v10/io.h 2008-10-09 18:13:53.000000000 -0400 @@ -190,8 +190,8 @@ ({ int _Foofoo; __asm__ volatile ("bmod [%0],%0" : "=r" (_Foofoo) : "0" \ (255)); _Foofoo; }) -#define TRACE_OFF() do { __asm__ volatile ("bmod [%0],%0" : : "r" (254)); } while (0) -#define SIM_END() do { __asm__ volatile ("bmod [%0],%0" : : "r" (28)); } while (0) +#define TRACE_OFF() do { __asm__ volatile ("bmod [%0],%0" :: "r" (254)); } while (0) +#define SIM_END() do { __asm__ volatile ("bmod [%0],%0" :: "r" (28)); } while (0) #define CRIS_CYCLES() __extension__ \ ({ unsigned long c; asm ("bmod [%1],%0" : "=r" (c) : "r" (27)); c;}) #endif /* ! defined CONFIG_SVINTO_SIM */ diff -Nurb linux-2.6.27-720/include/asm-cris/module.h linux-2.6.27-710/include/asm-cris/module.h --- linux-2.6.27-720/include/asm-cris/module.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-cris/module.h 2008-10-09 18:13:53.000000000 -0400 @@ -1,7 +1,7 @@ #ifndef _ASM_CRIS_MODULE_H #define _ASM_CRIS_MODULE_H /* cris is simple */ -EMPTY_STRUCT_DECL(mod_arch_specific); +struct mod_arch_specific { }; #define Elf_Shdr Elf32_Shdr #define Elf_Sym Elf32_Sym diff -Nurb linux-2.6.27-720/include/asm-frv/bug.h linux-2.6.27-710/include/asm-frv/bug.h --- linux-2.6.27-720/include/asm-frv/bug.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-frv/bug.h 2008-10-09 18:13:53.000000000 -0400 @@ -17,7 +17,7 @@ /* * Tell the user there is some problem. */ -asmlinkage void __debug_bug_trap(int signr); +extern asmlinkage void __debug_bug_trap(int signr); #ifdef CONFIG_NO_KERNEL_MSG #define _debug_bug_printk() diff -Nurb linux-2.6.27-720/include/asm-frv/fpu.h linux-2.6.27-710/include/asm-frv/fpu.h --- linux-2.6.27-720/include/asm-frv/fpu.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-frv/fpu.h 2008-10-09 18:13:53.000000000 -0400 @@ -6,6 +6,6 @@ * MAX floating point unit state size (FSAVE/FRESTORE) */ -#define kernel_fpu_end() do { asm volatile("bar": : :"memory"); preempt_enable(); } while(0) +#define kernel_fpu_end() do { asm volatile("bar":::"memory"); preempt_enable(); } while(0) #endif /* __ASM_FPU_H */ diff -Nurb linux-2.6.27-720/include/asm-frv/gdb-stub.h linux-2.6.27-710/include/asm-frv/gdb-stub.h --- linux-2.6.27-720/include/asm-frv/gdb-stub.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-frv/gdb-stub.h 2008-10-09 18:13:53.000000000 -0400 @@ -87,14 +87,14 @@ extern void gdbstub_tx_flush(void); extern void gdbstub_do_rx(void); -asmlinkage void __debug_stub_init_break(void); -asmlinkage void __break_hijack_kernel_event(void); -asmlinkage void __break_hijack_kernel_event_breaks_here(void); -asmlinkage void start_kernel(void); - -asmlinkage void gdbstub_rx_handler(void); -asmlinkage void gdbstub_rx_irq(void); -asmlinkage void gdbstub_intercept(void); +extern asmlinkage void __debug_stub_init_break(void); +extern asmlinkage void __break_hijack_kernel_event(void); +extern asmlinkage void __break_hijack_kernel_event_breaks_here(void); +extern asmlinkage void start_kernel(void); + +extern asmlinkage void gdbstub_rx_handler(void); +extern asmlinkage void gdbstub_rx_irq(void); +extern asmlinkage void gdbstub_intercept(void); extern uint32_t __entry_usertrap_table[]; extern uint32_t __entry_kerneltrap_table[]; diff -Nurb linux-2.6.27-720/include/asm-frv/highmem.h linux-2.6.27-710/include/asm-frv/highmem.h --- linux-2.6.27-720/include/asm-frv/highmem.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-frv/highmem.h 2008-10-09 18:13:53.000000000 -0400 @@ -82,11 +82,11 @@ dampr = paddr | xAMPRx_L | xAMPRx_M | xAMPRx_S | xAMPRx_SS_16Kb | xAMPRx_V; \ \ if (type != __KM_CACHE) \ - asm volatile("movgs %0,dampr"#ampr : : "r"(dampr) : "memory"); \ + asm volatile("movgs %0,dampr"#ampr :: "r"(dampr) : "memory"); \ else \ asm volatile("movgs %0,iampr"#ampr"\n" \ "movgs %0,dampr"#ampr"\n" \ - : : "r"(dampr) : "memory" \ + :: "r"(dampr) : "memory" \ ); \ \ asm("movsg damlr"#ampr",%0" : "=r"(damlr)); \ @@ -140,9 +140,9 @@ #define __kunmap_atomic_primary(type, ampr) \ do { \ - asm volatile("movgs gr0,dampr"#ampr"\n" : : : "memory"); \ + asm volatile("movgs gr0,dampr"#ampr"\n" ::: "memory"); \ if (type == __KM_CACHE) \ - asm volatile("movgs gr0,iampr"#ampr"\n" : : : "memory");\ + asm volatile("movgs gr0,iampr"#ampr"\n" ::: "memory"); \ } while(0) #define __kunmap_atomic_secondary(slot, vaddr) \ diff -Nurb linux-2.6.27-720/include/asm-frv/module.h linux-2.6.27-710/include/asm-frv/module.h --- linux-2.6.27-720/include/asm-frv/module.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-frv/module.h 2008-10-09 18:13:53.000000000 -0400 @@ -11,7 +11,9 @@ #ifndef _ASM_MODULE_H #define _ASM_MODULE_H -EMPTY_STRUCT_DECL(mod_arch_specific); +struct mod_arch_specific +{ +}; #define Elf_Shdr Elf32_Shdr #define Elf_Sym Elf32_Sym diff -Nurb linux-2.6.27-720/include/asm-frv/pgtable.h linux-2.6.27-710/include/asm-frv/pgtable.h --- linux-2.6.27-720/include/asm-frv/pgtable.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-frv/pgtable.h 2008-10-09 18:13:53.000000000 -0400 @@ -176,7 +176,7 @@ #define set_pte(pteptr, pteval) \ do { \ *(pteptr) = (pteval); \ - asm volatile("dcf %M0" : : "U"(*pteptr)); \ + asm volatile("dcf %M0" :: "U"(*pteptr)); \ } while(0) #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) @@ -210,7 +210,7 @@ #define set_pgd(pgdptr, pgdval) \ do { \ memcpy((pgdptr), &(pgdval), sizeof(pgd_t)); \ - asm volatile("dcf %M0" : : "U"(*(pgdptr))); \ + asm volatile("dcf %M0" :: "U"(*(pgdptr))); \ } while(0) static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) diff -Nurb linux-2.6.27-720/include/asm-frv/processor.h linux-2.6.27-710/include/asm-frv/processor.h --- linux-2.6.27-720/include/asm-frv/processor.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-frv/processor.h 2008-10-09 18:13:53.000000000 -0400 @@ -111,9 +111,9 @@ { } -asmlinkage int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); -asmlinkage void save_user_regs(struct user_context *target); -asmlinkage void *restore_user_regs(const struct user_context *target, ...); +extern asmlinkage int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); +extern asmlinkage void save_user_regs(struct user_context *target); +extern asmlinkage void *restore_user_regs(const struct user_context *target, ...); #define copy_segments(tsk, mm) do { } while (0) #define release_segments(mm) do { } while (0) diff -Nurb linux-2.6.27-720/include/asm-frv/spr-regs.h linux-2.6.27-710/include/asm-frv/spr-regs.h --- linux-2.6.27-720/include/asm-frv/spr-regs.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-frv/spr-regs.h 2008-10-09 18:13:53.000000000 -0400 @@ -343,7 +343,7 @@ #define restore_dampr(R, _dampr) \ do { \ - asm volatile("movgs %0,dampr"R : : "r"(_dampr)); \ + asm volatile("movgs %0,dampr"R :: "r"(_dampr)); \ } while(0) /* diff -Nurb linux-2.6.27-720/include/asm-frv/system.h linux-2.6.27-710/include/asm-frv/system.h --- linux-2.6.27-720/include/asm-frv/system.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-frv/system.h 2008-10-09 18:13:53.000000000 -0400 @@ -23,7 +23,7 @@ * `prev' will never be the same as `next'. * The `mb' is to tell GCC not to cache `current' across this call. */ -asmlinkage +extern asmlinkage struct task_struct *__switch_to(struct thread_struct *prev_thread, struct thread_struct *next_thread, struct task_struct *prev); @@ -175,7 +175,7 @@ /* * Force strict CPU ordering. */ -#define nop() asm volatile ("nop": :) +#define nop() asm volatile ("nop"::) #define mb() asm volatile ("membar" : : :"memory") #define rmb() asm volatile ("membar" : : :"memory") #define wmb() asm volatile ("membar" : : :"memory") diff -Nurb linux-2.6.27-720/include/asm-m32r/module.h linux-2.6.27-710/include/asm-m32r/module.h --- linux-2.6.27-720/include/asm-m32r/module.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-m32r/module.h 2008-10-09 18:13:53.000000000 -0400 @@ -1,7 +1,7 @@ #ifndef _ASM_M32R_MODULE_H #define _ASM_M32R_MODULE_H -EMPTY_STRUCT_DECL(mod_arch_specific); +struct mod_arch_specific { }; #define Elf_Shdr Elf32_Shdr #define Elf_Sym Elf32_Sym diff -Nurb linux-2.6.27-720/include/asm-m68k/system.h linux-2.6.27-710/include/asm-m68k/system.h --- linux-2.6.27-720/include/asm-m68k/system.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-m68k/system.h 2008-10-09 18:13:53.000000000 -0400 @@ -167,23 +167,23 @@ #define __HAVE_ARCH_CMPXCHG 1 static inline unsigned long __cmpxchg(volatile void *p, unsigned long old, - unsigned long n, int size) + unsigned long new, int size) { switch (size) { case 1: __asm__ __volatile__ ("casb %0,%2,%1" : "=d" (old), "=m" (*(char *)p) - : "d" (n), "0" (old), "m" (*(char *)p)); + : "d" (new), "0" (old), "m" (*(char *)p)); break; case 2: __asm__ __volatile__ ("casw %0,%2,%1" : "=d" (old), "=m" (*(short *)p) - : "d" (n), "0" (old), "m" (*(short *)p)); + : "d" (new), "0" (old), "m" (*(short *)p)); break; case 4: __asm__ __volatile__ ("casl %0,%2,%1" : "=d" (old), "=m" (*(int *)p) - : "d" (n), "0" (old), "m" (*(int *)p)); + : "d" (new), "0" (old), "m" (*(int *)p)); break; } return old; diff -Nurb linux-2.6.27-720/include/asm-mips/fpu.h linux-2.6.27-710/include/asm-mips/fpu.h --- linux-2.6.27-720/include/asm-mips/fpu.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-mips/fpu.h 2008-10-09 18:13:53.000000000 -0400 @@ -28,11 +28,11 @@ struct sigcontext; struct sigcontext32; -asmlinkage int (*save_fp_context)(struct sigcontext __user *sc); -asmlinkage int (*restore_fp_context)(struct sigcontext __user *sc); +extern asmlinkage int (*save_fp_context)(struct sigcontext __user *sc); +extern asmlinkage int (*restore_fp_context)(struct sigcontext __user *sc); -asmlinkage int (*save_fp_context32)(struct sigcontext32 __user *sc); -asmlinkage int (*restore_fp_context32)(struct sigcontext32 __user *sc); +extern asmlinkage int (*save_fp_context32)(struct sigcontext32 __user *sc); +extern asmlinkage int (*restore_fp_context32)(struct sigcontext32 __user *sc); extern void fpu_emulator_init_fpu(void); extern int fpu_emulator_save_context(struct sigcontext __user *sc); diff -Nurb linux-2.6.27-720/include/asm-mips/io.h linux-2.6.27-710/include/asm-mips/io.h --- linux-2.6.27-720/include/asm-mips/io.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-mips/io.h 2008-10-09 18:13:53.000000000 -0400 @@ -303,7 +303,7 @@ volatile type *__mem; \ type __val; \ \ - __mem = (type *)__swizzle_addr_##bwlq((unsigned long)(mem)); \ + __mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem)); \ \ __val = pfx##ioswab##bwlq(__mem, val); \ \ @@ -336,7 +336,7 @@ volatile type *__mem; \ type __val; \ \ - __mem = (type *)__swizzle_addr_##bwlq((unsigned long)(mem)); \ + __mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem)); \ \ if (sizeof(type) != sizeof(u64) || sizeof(u64) == sizeof(long)) \ __val = *__mem; \ @@ -370,7 +370,7 @@ volatile type *__addr; \ type __val; \ \ - __addr = (type *)__swizzle_addr_##bwlq(mips_io_port_base + port); \ + __addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base + port); \ \ __val = pfx##ioswab##bwlq(__addr, val); \ \ @@ -386,7 +386,7 @@ volatile type *__addr; \ type __val; \ \ - __addr = (type *)__swizzle_addr_##bwlq(mips_io_port_base + port); \ + __addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base + port); \ \ BUILD_BUG_ON(sizeof(type) > sizeof(unsigned long)); \ \ @@ -448,7 +448,7 @@ static inline void writes##bwlq(volatile void __iomem *mem, \ const void *addr, unsigned int count) \ { \ - const volatile type *__addr = (const type *) addr; \ + const volatile type *__addr = addr; \ \ while (count--) { \ __mem_write##bwlq(*__addr, mem); \ @@ -459,7 +459,7 @@ static inline void reads##bwlq(volatile void __iomem *mem, void *addr, \ unsigned int count) \ { \ - volatile type *__addr = (type *) addr; \ + volatile type *__addr = addr; \ \ while (count--) { \ *__addr = __mem_read##bwlq(mem); \ @@ -472,7 +472,7 @@ static inline void outs##bwlq(unsigned long port, const void *addr, \ unsigned int count) \ { \ - const volatile type *__addr = (const type *) addr; \ + const volatile type *__addr = addr; \ \ while (count--) { \ __mem_out##bwlq(*__addr, port); \ @@ -483,7 +483,7 @@ static inline void ins##bwlq(unsigned long port, void *addr, \ unsigned int count) \ { \ - volatile type *__addr = (type *) addr; \ + volatile type *__addr = addr; \ \ while (count--) { \ *__addr = __mem_in##bwlq(port); \ @@ -505,7 +505,7 @@ /* Depends on MIPS II instruction set */ -#define mmiowb() asm volatile ("sync" : : : "memory") +#define mmiowb() asm volatile ("sync" ::: "memory") static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) { diff -Nurb linux-2.6.27-720/include/asm-mips/ip32/mace.h linux-2.6.27-710/include/asm-mips/ip32/mace.h --- linux-2.6.27-720/include/asm-mips/ip32/mace.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-mips/ip32/mace.h 2008-10-09 18:13:53.000000000 -0400 @@ -308,9 +308,11 @@ */ /* Parallel port */ -EMPTY_STRUCT_DECL(mace_parallel); +struct mace_parallel { +}; -EMPTY_STRUCT_DECL(mace_ecp1284); /* later... */ +struct mace_ecp1284 { /* later... */ +}; /* Serial port */ struct mace_serial { diff -Nurb linux-2.6.27-720/include/asm-mips/mips-boards/sim.h linux-2.6.27-710/include/asm-mips/mips-boards/sim.h --- linux-2.6.27-720/include/asm-mips/mips-boards/sim.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-mips/mips-boards/sim.h 2008-10-09 18:13:53.000000000 -0400 @@ -31,7 +31,7 @@ ({ \ __asm__ __volatile__( \ "sltiu $0,$0, %0" \ - : :"i"(code) \ + ::"i"(code) \ ); \ }) diff -Nurb linux-2.6.27-720/include/asm-mips/mipsregs.h linux-2.6.27-710/include/asm-mips/mipsregs.h --- linux-2.6.27-720/include/asm-mips/mipsregs.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-mips/mipsregs.h 2008-10-09 18:13:53.000000000 -0400 @@ -1052,15 +1052,15 @@ #define mfhi2() ({ long mfhi2; __asm__("mfhi %0, $ac2" : "=r" (mfhi2)); mfhi2;}) #define mfhi3() ({ long mfhi3; __asm__("mfhi %0, $ac3" : "=r" (mfhi3)); mfhi3;}) -#define mtlo0(x) __asm__("mtlo %0, $ac0" : :"r" (x)) -#define mtlo1(x) __asm__("mtlo %0, $ac1" : :"r" (x)) -#define mtlo2(x) __asm__("mtlo %0, $ac2" : :"r" (x)) -#define mtlo3(x) __asm__("mtlo %0, $ac3" : :"r" (x)) - -#define mthi0(x) __asm__("mthi %0, $ac0" : :"r" (x)) -#define mthi1(x) __asm__("mthi %0, $ac1" : :"r" (x)) -#define mthi2(x) __asm__("mthi %0, $ac2" : :"r" (x)) -#define mthi3(x) __asm__("mthi %0, $ac3" : :"r" (x)) +#define mtlo0(x) __asm__("mtlo %0, $ac0" ::"r" (x)) +#define mtlo1(x) __asm__("mtlo %0, $ac1" ::"r" (x)) +#define mtlo2(x) __asm__("mtlo %0, $ac2" ::"r" (x)) +#define mtlo3(x) __asm__("mtlo %0, $ac3" ::"r" (x)) + +#define mthi0(x) __asm__("mthi %0, $ac0" ::"r" (x)) +#define mthi1(x) __asm__("mthi %0, $ac1" ::"r" (x)) +#define mthi2(x) __asm__("mthi %0, $ac2" ::"r" (x)) +#define mthi3(x) __asm__("mthi %0, $ac3" ::"r" (x)) #else @@ -1390,13 +1390,13 @@ } \ \ static inline unsigned int \ -change_c0_##name(unsigned int change, unsigned int newval) \ +change_c0_##name(unsigned int change, unsigned int new) \ { \ unsigned int res; \ \ res = read_c0_##name(); \ res &= ~change; \ - res |= (newval & change); \ + res |= (new & change); \ write_c0_##name(res); \ \ return res; \ diff -Nurb linux-2.6.27-720/include/asm-mips/paccess.h linux-2.6.27-710/include/asm-mips/paccess.h --- linux-2.6.27-720/include/asm-mips/paccess.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-mips/paccess.h 2008-10-09 18:13:53.000000000 -0400 @@ -22,8 +22,8 @@ #define __PA_ADDR ".dword" #endif -asmlinkage void handle_ibe(void); -asmlinkage void handle_dbe(void); +extern asmlinkage void handle_ibe(void); +extern asmlinkage void handle_dbe(void); #define put_dbe(x, ptr) __put_dbe((x), (ptr), sizeof(*(ptr))) #define get_dbe(x, ptr) __get_dbe((x), (ptr), sizeof(*(ptr))) diff -Nurb linux-2.6.27-720/include/asm-mips/processor.h linux-2.6.27-710/include/asm-mips/processor.h --- linux-2.6.27-720/include/asm-mips/processor.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-mips/processor.h 2008-10-09 18:13:53.000000000 -0400 @@ -242,7 +242,7 @@ * overhead of a function call by forcing the compiler to save the return * address register on the stack. */ -#define return_address() ({__asm__ __volatile__("": : :"$31");__builtin_return_address(0);}) +#define return_address() ({__asm__ __volatile__("":::"$31");__builtin_return_address(0);}) #ifdef CONFIG_CPU_HAS_PREFETCH diff -Nurb linux-2.6.27-720/include/asm-mips/ptrace.h linux-2.6.27-710/include/asm-mips/ptrace.h --- linux-2.6.27-720/include/asm-mips/ptrace.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-mips/ptrace.h 2008-10-09 18:13:53.000000000 -0400 @@ -84,7 +84,7 @@ #define instruction_pointer(regs) ((regs)->cp0_epc) #define profile_pc(regs) instruction_pointer(regs) -asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit); +extern asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit); extern NORET_TYPE void die(const char *, const struct pt_regs *) ATTRIB_NORET; diff -Nurb linux-2.6.27-720/include/asm-mips/smp.h linux-2.6.27-710/include/asm-mips/smp.h --- linux-2.6.27-720/include/asm-mips/smp.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-mips/smp.h 2008-10-09 18:13:53.000000000 -0400 @@ -55,7 +55,7 @@ mp_ops->send_ipi_single(cpu, SMP_RESCHEDULE_YOURSELF); } -asmlinkage void smp_call_function_interrupt(void); +extern asmlinkage void smp_call_function_interrupt(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi(cpumask_t mask); diff -Nurb linux-2.6.27-720/include/asm-mips/system.h linux-2.6.27-710/include/asm-mips/system.h --- linux-2.6.27-720/include/asm-mips/system.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-mips/system.h 2008-10-09 18:13:53.000000000 -0400 @@ -27,7 +27,7 @@ * switch_to(n) should switch tasks to task nr n, first * checking that n isn't the current task, in which case it does nothing. */ -asmlinkage void *resume(void *last, void *next, void *next_ti); +extern asmlinkage void *resume(void *last, void *next, void *next_ti); struct task_struct; @@ -189,9 +189,9 @@ { switch (size) { case 4: - return __xchg_u32((volatile int *) ptr, x); + return __xchg_u32(ptr, x); case 8: - return __xchg_u64((volatile __u64 *) ptr, x); + return __xchg_u64(ptr, x); } __xchg_called_with_bad_pointer(); return x; diff -Nurb linux-2.6.27-720/include/asm-parisc/system.h linux-2.6.27-710/include/asm-parisc/system.h --- linux-2.6.27-720/include/asm-parisc/system.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-parisc/system.h 2008-10-09 18:13:53.000000000 -0400 @@ -122,7 +122,7 @@ ** The __asm__ op below simple prevents gcc/ld from reordering ** instructions across the mb() "call". */ -#define mb() __asm__ __volatile__("": : :"memory") /* barrier() */ +#define mb() __asm__ __volatile__("":::"memory") /* barrier() */ #define rmb() mb() #define wmb() mb() #define smp_mb() mb() diff -Nurb linux-2.6.27-720/include/asm-um/module-i386.h linux-2.6.27-710/include/asm-um/module-i386.h --- linux-2.6.27-720/include/asm-um/module-i386.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-um/module-i386.h 2008-10-09 18:13:53.000000000 -0400 @@ -2,7 +2,9 @@ #define __UM_MODULE_I386_H /* UML is simple */ -EMPTY_STRUCT_DECL(mod_arch_specific); +struct mod_arch_specific +{ +}; #define Elf_Shdr Elf32_Shdr #define Elf_Sym Elf32_Sym diff -Nurb linux-2.6.27-720/include/asm-um/module-x86_64.h linux-2.6.27-710/include/asm-um/module-x86_64.h --- linux-2.6.27-720/include/asm-um/module-x86_64.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-um/module-x86_64.h 2008-10-09 18:13:53.000000000 -0400 @@ -8,7 +8,9 @@ #define __UM_MODULE_X86_64_H /* UML is simple */ -EMPTY_STRUCT_DECL(mod_arch_specific); +struct mod_arch_specific +{ +}; #define Elf_Shdr Elf64_Shdr #define Elf_Sym Elf64_Sym diff -Nurb linux-2.6.27-720/include/asm-x86/bitops.h linux-2.6.27-710/include/asm-x86/bitops.h --- linux-2.6.27-720/include/asm-x86/bitops.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-x86/bitops.h 2008-10-09 18:13:53.000000000 -0400 @@ -35,7 +35,7 @@ * a mask operation on a byte. */ #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) -#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((char *)(addr) + ((nr)>>3)) +#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) #define CONST_MASK(nr) (1 << ((nr) & 7)) /** diff -Nurb linux-2.6.27-720/include/asm-x86/desc.h linux-2.6.27-710/include/asm-x86/desc.h --- linux-2.6.27-720/include/asm-x86/desc.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-x86/desc.h 2008-10-09 18:13:53.000000000 -0400 @@ -70,7 +70,7 @@ static inline int desc_empty(const void *ptr) { - const u32 *desc = (const u32 *) ptr; + const u32 *desc = ptr; return !(desc[0] | desc[1]); } diff -Nurb linux-2.6.27-720/include/asm-x86/hw_irq.h linux-2.6.27-710/include/asm-x86/hw_irq.h --- linux-2.6.27-720/include/asm-x86/hw_irq.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-x86/hw_irq.h 2008-10-09 18:13:53.000000000 -0400 @@ -84,16 +84,6 @@ extern void eisa_set_level_irq(unsigned int irq); /* Voyager functions */ -#ifdef __cplusplus -asmlinkage void vic_cpi_interrupt(void); -asmlinkage void vic_sys_interrupt(void); -asmlinkage void vic_cmn_interrupt(void); -asmlinkage void qic_timer_interrupt(void); -asmlinkage void qic_invalidate_interrupt(void); -asmlinkage void qic_reschedule_interrupt(void); -asmlinkage void qic_enable_irq_interrupt(void); -asmlinkage void qic_call_function_interrupt(void); -#else extern asmlinkage void vic_cpi_interrupt(void); extern asmlinkage void vic_sys_interrupt(void); extern asmlinkage void vic_cmn_interrupt(void); @@ -103,8 +93,6 @@ extern asmlinkage void qic_enable_irq_interrupt(void); extern asmlinkage void qic_call_function_interrupt(void); -#endif - #ifdef CONFIG_X86_32 extern void (*const interrupt[NR_IRQS])(void); #else diff -Nurb linux-2.6.27-720/include/asm-x86/page.h linux-2.6.27-710/include/asm-x86/page.h --- linux-2.6.27-720/include/asm-x86/page.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-x86/page.h 2008-10-09 18:13:53.000000000 -0400 @@ -138,7 +138,7 @@ static inline pte_t native_make_pte(pteval_t val) { - return (pte_t) ({pte_t duh; duh.pte = val;duh;}); + return (pte_t) { .pte = val }; } static inline pteval_t native_pte_val(pte_t pte) diff -Nurb linux-2.6.27-720/include/asm-x86/paravirt.h linux-2.6.27-710/include/asm-x86/paravirt.h --- linux-2.6.27-720/include/asm-x86/paravirt.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-x86/paravirt.h 2008-10-09 18:13:53.000000000 -0400 @@ -1060,7 +1060,7 @@ pv_mmu_ops.make_pte, val); - return (pte_t) ({pte_t duh; duh.pte = ret;duh;}); + return (pte_t) { .pte = ret }; } static inline pteval_t pte_val(pte_t pte) @@ -1131,7 +1131,7 @@ ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start, mm, addr, ptep); - return (pte_t) ({pte_t duh; duh.pte = ret;duh;}); + return (pte_t) { .pte = ret }; } static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, @@ -1314,12 +1314,10 @@ set_pte_at(mm, addr, ptep, __pte(0)); } -/* static inline void pmd_clear(pmd_t *pmdp) { set_pmd(pmdp, __pmd(0)); } -*/ #endif /* CONFIG_X86_PAE */ /* Lazy mode for batching updates / context switch */ diff -Nurb linux-2.6.27-720/include/asm-x86/pgtable-2level.h linux-2.6.27-710/include/asm-x86/pgtable-2level.h --- linux-2.6.27-720/include/asm-x86/pgtable-2level.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-x86/pgtable-2level.h 2008-10-09 18:13:53.000000000 -0400 @@ -33,12 +33,10 @@ native_set_pte(ptep, pte); } -/* static inline void native_pmd_clear(pmd_t *pmdp) { native_set_pmd(pmdp, __pmd(0)); } -*/ static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp) diff -Nurb linux-2.6.27-720/include/asm-x86/pgtable.h linux-2.6.27-710/include/asm-x86/pgtable.h --- linux-2.6.27-720/include/asm-x86/pgtable.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-x86/pgtable.h 2008-10-09 18:13:53.000000000 -0400 @@ -260,13 +260,11 @@ pgprot_val(pgprot)) & __supported_pte_mask); } -/* static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & __supported_pte_mask); } -*/ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { diff -Nurb linux-2.6.27-720/include/asm-x86/spinlock_types.h linux-2.6.27-710/include/asm-x86/spinlock_types.h --- linux-2.6.27-720/include/asm-x86/spinlock_types.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-x86/spinlock_types.h 2008-10-09 18:13:53.000000000 -0400 @@ -9,19 +9,12 @@ unsigned int slock; } raw_spinlock_t; -#ifndef __cplusplus #define __RAW_SPIN_LOCK_UNLOCKED { 0 } -#else -#define __RAW_SPIN_LOCK_UNLOCKED ({raw_spinlock_t duh; duh.slock=0;duh;}) -#endif typedef struct { unsigned int lock; } raw_rwlock_t; -#ifndef __cplusplus #define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } -#else -#define __RAW_RW_LOCK_UNLOCKED ({raw_rwlock_t duh;duh.lock=RW_LOCK_BIAS;duh;}) -#endif + #endif diff -Nurb linux-2.6.27-720/include/asm-x86/vdso.h linux-2.6.27-710/include/asm-x86/vdso.h --- linux-2.6.27-720/include/asm-x86/vdso.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-x86/vdso.h 2008-10-09 18:13:53.000000000 -0400 @@ -33,8 +33,8 @@ * These symbols are defined with the addresses in the vsyscall page. * See vsyscall-sigreturn.S. */ -extern void* __user __kernel_sigreturn; -extern void* __user __kernel_rt_sigreturn; +extern void __user __kernel_sigreturn; +extern void __user __kernel_rt_sigreturn; /* * These symbols are defined by vdso32.S to mark the bounds diff -Nurb linux-2.6.27-720/include/asm-xtensa/module.h linux-2.6.27-710/include/asm-xtensa/module.h --- linux-2.6.27-720/include/asm-xtensa/module.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-xtensa/module.h 2008-10-09 18:13:53.000000000 -0400 @@ -13,7 +13,10 @@ #ifndef _XTENSA_MODULE_H #define _XTENSA_MODULE_H -EMPTY_STRUCT_DECL(mod_arch_specific); +struct mod_arch_specific +{ + /* No special elements, yet. */ +}; #define MODULE_ARCH_VERMAGIC "xtensa-" __stringify(XCHAL_CORE_ID) " " diff -Nurb linux-2.6.27-720/include/asm-xtensa/processor.h linux-2.6.27-710/include/asm-xtensa/processor.h --- linux-2.6.27-720/include/asm-xtensa/processor.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-xtensa/processor.h 2008-10-09 18:13:53.000000000 -0400 @@ -183,7 +183,7 @@ /* Special register access. */ -#define WSR(v,sr) __asm__ __volatile__ ("wsr %0,"__stringify(sr) : : "a"(v)); +#define WSR(v,sr) __asm__ __volatile__ ("wsr %0,"__stringify(sr) :: "a"(v)); #define RSR(v,sr) __asm__ __volatile__ ("rsr %0,"__stringify(sr) : "=a"(v)); #define set_sr(x,sr) ({unsigned int v=(unsigned int)x; WSR(v,sr);}) diff -Nurb linux-2.6.27-720/include/asm-xtensa/system.h linux-2.6.27-710/include/asm-xtensa/system.h --- linux-2.6.27-720/include/asm-xtensa/system.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/asm-xtensa/system.h 2008-10-09 18:13:53.000000000 -0400 @@ -21,21 +21,21 @@ __asm__ __volatile__ ("rsr %0,"__stringify(PS) : "=a" (x)); #define local_irq_restore(x) do { \ __asm__ __volatile__ ("wsr %0, "__stringify(PS)" ; rsync" \ - : : "a" (x) : "memory"); } while(0); + :: "a" (x) : "memory"); } while(0); #define local_irq_save(x) do { \ __asm__ __volatile__ ("rsil %0, "__stringify(LOCKLEVEL) \ - : "=a" (x) : : "memory");} while(0); + : "=a" (x) :: "memory");} while(0); static inline void local_irq_disable(void) { unsigned long flags; __asm__ __volatile__ ("rsil %0, "__stringify(LOCKLEVEL) - : "=a" (flags) : : "memory"); + : "=a" (flags) :: "memory"); } static inline void local_irq_enable(void) { unsigned long flags; - __asm__ __volatile__ ("rsil %0, 0" : "=a" (flags) : : "memory"); + __asm__ __volatile__ ("rsil %0, 0" : "=a" (flags) :: "memory"); } diff -Nurb linux-2.6.27-720/include/linux/backing-dev.h linux-2.6.27-710/include/linux/backing-dev.h --- linux-2.6.27-720/include/linux/backing-dev.h 2009-05-04 12:18:33.000000000 -0400 +++ linux-2.6.27-710/include/linux/backing-dev.h 2008-10-09 18:13:53.000000000 -0400 @@ -29,7 +29,7 @@ BDI_unused, /* Available bits start here */ }; -typedef int (congested_fn_t)(void *, int); +typedef int (congested_fn)(void *, int); enum bdi_stat_item { BDI_RECLAIMABLE, @@ -43,7 +43,7 @@ unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ unsigned long state; /* Always use atomic bitops on this */ unsigned int capabilities; /* Device capabilities */ - congested_fn_t *congested_fn; /* Function pointer if device is md/dm */ + congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ void (*unplug_io_fn)(struct backing_dev_info *, struct page *); void *unplug_io_data; diff -Nurb linux-2.6.27-720/include/linux/compat.h linux-2.6.27-710/include/linux/compat.h --- linux-2.6.27-720/include/linux/compat.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/compat.h 2009-05-04 12:15:01.000000000 -0400 @@ -115,7 +115,7 @@ struct compat_siginfo; -asmlinkage long compat_sys_waitid(int, compat_pid_t, +extern asmlinkage long compat_sys_waitid(int, compat_pid_t, struct compat_siginfo __user *, int, struct compat_rusage __user *); diff -Nurb linux-2.6.27-720/include/linux/dmaengine.h linux-2.6.27-710/include/linux/dmaengine.h --- linux-2.6.27-720/include/linux/dmaengine.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/dmaengine.h 2008-10-09 18:13:53.000000000 -0400 @@ -377,7 +377,7 @@ static inline void async_tx_ack(struct dma_async_tx_descriptor *tx) { - tx->flags = (enum dma_ctrl_flags) (tx->flags | DMA_CTRL_ACK); + tx->flags |= DMA_CTRL_ACK; } static inline bool async_tx_test_ack(struct dma_async_tx_descriptor *tx) diff -Nurb linux-2.6.27-720/include/linux/dqblk_v1.h linux-2.6.27-710/include/linux/dqblk_v1.h --- linux-2.6.27-720/include/linux/dqblk_v1.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/dqblk_v1.h 2008-10-09 18:13:53.000000000 -0400 @@ -18,6 +18,7 @@ #define V1_DEL_REWRITE 2 /* Special information about quotafile */ -EMPTY_STRUCT_DECL(v1_mem_dqinfo); +struct v1_mem_dqinfo { +}; #endif /* _LINUX_DQBLK_V1_H */ diff -Nurb linux-2.6.27-720/include/linux/fs.h linux-2.6.27-710/include/linux/fs.h --- linux-2.6.27-720/include/linux/fs.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/fs.h 2009-05-04 12:16:10.000000000 -0400 @@ -2111,7 +2111,7 @@ static inline void simple_transaction_set(struct file *file, size_t n) { - struct simple_transaction_argresp *ar = (struct simple_transaction_argresp *) file->private_data; + struct simple_transaction_argresp *ar = file->private_data; BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); diff -Nurb linux-2.6.27-720/include/linux/fs.h.orig linux-2.6.27-710/include/linux/fs.h.orig --- linux-2.6.27-720/include/linux/fs.h.orig 2009-05-04 12:16:10.000000000 -0400 +++ linux-2.6.27-710/include/linux/fs.h.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,2199 +0,0 @@ -#ifndef _LINUX_FS_H -#define _LINUX_FS_H - -/* - * This file has definitions for some important file table - * structures etc. - */ - -#include -#include - -/* - * It's silly to have NR_OPEN bigger than NR_FILE, but you can change - * the file limit at runtime and only root can increase the per-process - * nr_file rlimit, so it's safe to set up a ridiculously high absolute - * upper limit on files-per-process. - * - * Some programs (notably those using select()) may have to be - * recompiled to take full advantage of the new limits.. - */ - -/* Fixed constants first: */ -#undef NR_OPEN -extern int sysctl_nr_open; -#define INR_OPEN 4096 /* Initial setting for nfile rlimits */ - -#define BLOCK_SIZE_BITS 10 -#define BLOCK_SIZE (1<i_sb->s_flags & (flg)) - -#define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) -#define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ - ((inode)->i_flags & S_SYNC)) -#define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ - ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) -#define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) -#define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) -#define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) -#define IS_TAGGED(inode) __IS_FLG(inode, MS_TAGGED) - -#define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) -#define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) -#define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) -#define IS_IXUNLINK(inode) ((inode)->i_flags & S_IXUNLINK) -#define IS_IXORUNLINK(inode) ((IS_IXUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode)) -#define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) - -#define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) -#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) -#define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) -#define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) - -#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_vflags & V_BARRIER)) - -#ifdef CONFIG_VSERVER_COWBL -# define IS_COW(inode) (IS_IXUNLINK(inode) && IS_IMMUTABLE(inode)) -# define IS_COW_LINK(inode) (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1)) -#else -# define IS_COW(inode) (0) -# define IS_COW_LINK(inode) (0) -#endif - -/* the read-only stuff doesn't really belong here, but any other place is - probably as bad and I don't want to create yet another include file. */ - -#define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ -#define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ -#define BLKRRPART _IO(0x12,95) /* re-read partition table */ -#define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ -#define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ -#define BLKRASET _IO(0x12,98) /* set read ahead for block device */ -#define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ -#define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ -#define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ -#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ -#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ -#define BLKSSZGET _IO(0x12,104)/* get block device sector size */ -#if 0 -#define BLKPG _IO(0x12,105)/* See blkpg.h */ - -/* Some people are morons. Do not use sizeof! */ - -#define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ -#define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ -/* This was here just to show that the number is taken - - probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ -#endif -/* A jump here: 108-111 have been used for various private purposes. */ -#define BLKBSZGET _IOR(0x12,112,size_t) -#define BLKBSZSET _IOW(0x12,113,size_t) -#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ -#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) -#define BLKTRACESTART _IO(0x12,116) -#define BLKTRACESTOP _IO(0x12,117) -#define BLKTRACETEARDOWN _IO(0x12,118) - -#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ -#define FIBMAP _IO(0x00,1) /* bmap access */ -#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ - -#define FS_IOC_GETFLAGS _IOR('f', 1, long) -#define FS_IOC_SETFLAGS _IOW('f', 2, long) -#define FS_IOC_GETVERSION _IOR('v', 1, long) -#define FS_IOC_SETVERSION _IOW('v', 2, long) -#define FS_IOC32_GETFLAGS _IOR('f', 1, int) -#define FS_IOC32_SETFLAGS _IOW('f', 2, int) -#define FS_IOC32_GETVERSION _IOR('v', 1, int) -#define FS_IOC32_SETVERSION _IOW('v', 2, int) - -/* - * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) - */ -#define FS_SECRM_FL 0x00000001 /* Secure deletion */ -#define FS_UNRM_FL 0x00000002 /* Undelete */ -#define FS_COMPR_FL 0x00000004 /* Compress file */ -#define FS_SYNC_FL 0x00000008 /* Synchronous updates */ -#define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ -#define FS_APPEND_FL 0x00000020 /* writes to file may only append */ -#define FS_NODUMP_FL 0x00000040 /* do not dump file */ -#define FS_NOATIME_FL 0x00000080 /* do not update atime */ -/* Reserved for compression usage... */ -#define FS_DIRTY_FL 0x00000100 -#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ -#define FS_NOCOMP_FL 0x00000400 /* Don't compress */ -#define FS_ECOMPR_FL 0x00000800 /* Compression error */ -/* End compression flags --- maybe not all used */ -#define FS_BTREE_FL 0x00001000 /* btree format dir */ -#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ -#define FS_IMAGIC_FL 0x00002000 /* AFS directory */ -#define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ -#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ -#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ -#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define FS_EXTENT_FL 0x00080000 /* Extents */ -#define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ -#define FS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */ -#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ - -#define FS_BARRIER_FL 0x04000000 /* Barrier for chroot() */ -#define FS_COW_FL 0x20000000 /* Copy on Write marker */ - -#define FS_FL_USER_VISIBLE 0x0103DFFF /* User visible flags */ -#define FS_FL_USER_MODIFIABLE 0x010380FF /* User modifiable flags */ - -#define SYNC_FILE_RANGE_WAIT_BEFORE 1 -#define SYNC_FILE_RANGE_WRITE 2 -#define SYNC_FILE_RANGE_WAIT_AFTER 4 - -#ifdef __KERNEL__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -struct export_operations; -struct hd_geometry; -struct iovec; -struct nameidata; -struct kiocb; -struct pipe_inode_info; -struct poll_table_struct; -struct kstatfs; -struct vm_area_struct; -struct vfsmount; - -extern void __init inode_init(void); -extern void __init inode_init_early(void); -extern void __init files_init(unsigned long); - -struct buffer_head; -typedef int (get_block_t)(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create); -typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, - ssize_t bytes, void *private); - -/* - * Attribute flags. These should be or-ed together to figure out what - * has been changed! - */ -#define ATTR_MODE (1 << 0) -#define ATTR_UID (1 << 1) -#define ATTR_GID (1 << 2) -#define ATTR_SIZE (1 << 3) -#define ATTR_ATIME (1 << 4) -#define ATTR_MTIME (1 << 5) -#define ATTR_CTIME (1 << 6) -#define ATTR_ATIME_SET (1 << 7) -#define ATTR_MTIME_SET (1 << 8) -#define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ -#define ATTR_ATTR_FLAG (1 << 10) -#define ATTR_KILL_SUID (1 << 11) -#define ATTR_KILL_SGID (1 << 12) -#define ATTR_FILE (1 << 13) -#define ATTR_KILL_PRIV (1 << 14) -#define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ -#define ATTR_TIMES_SET (1 << 16) -#define ATTR_TAG (1 << 17) - -/* - * This is the Inode Attributes structure, used for notify_change(). It - * uses the above definitions as flags, to know which values have changed. - * Also, in this manner, a Filesystem can look at only the values it cares - * about. Basically, these are the attributes that the VFS layer can - * request to change from the FS layer. - * - * Derek Atkins 94-10-20 - */ -struct iattr { - unsigned int ia_valid; - umode_t ia_mode; - uid_t ia_uid; - gid_t ia_gid; - tag_t ia_tag; - loff_t ia_size; - struct timespec ia_atime; - struct timespec ia_mtime; - struct timespec ia_ctime; - - /* - * Not an attribute, but an auxilary info for filesystems wanting to - * implement an ftruncate() like method. NOTE: filesystem should - * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). - */ - struct file *ia_file; -}; - -#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */ -#define ATTR_FLAG_IXUNLINK 1024 /* Immutable invert on unlink */ - -/* - * Includes for diskquotas. - */ -#include - -/** - * enum positive_aop_returns - aop return codes with specific semantics - * - * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has - * completed, that the page is still locked, and - * should be considered active. The VM uses this hint - * to return the page to the active list -- it won't - * be a candidate for writeback again in the near - * future. Other callers must be careful to unlock - * the page if they get this return. Returned by - * writepage(); - * - * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has - * unlocked it and the page might have been truncated. - * The caller should back up to acquiring a new page and - * trying again. The aop will be taking reasonable - * precautions not to livelock. If the caller held a page - * reference, it should drop it before retrying. Returned - * by readpage(). - * - * address_space_operation functions return these large constants to indicate - * special semantics to the caller. These are much larger than the bytes in a - * page to allow for functions that return the number of bytes operated on in a - * given page. - */ - -enum positive_aop_returns { - AOP_WRITEPAGE_ACTIVATE = 0x80000, - AOP_TRUNCATED_PAGE = 0x80001, -}; - -#define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ -#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ -#define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct - * helper code (eg buffer layer) - * to clear GFP_FS from alloc */ - -/* - * oh the beauties of C type declarations. - */ -struct page; -struct address_space; -struct writeback_control; - -struct iov_iter { - const struct iovec *iov; - unsigned long nr_segs; - size_t iov_offset; - size_t count; -}; - -size_t iov_iter_copy_from_user_atomic(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes); -size_t iov_iter_copy_from_user(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes); -void iov_iter_advance(struct iov_iter *i, size_t bytes); -int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); -size_t iov_iter_single_seg_count(struct iov_iter *i); - -static inline void iov_iter_init(struct iov_iter *i, - const struct iovec *iov, unsigned long nr_segs, - size_t count, size_t written) -{ - i->iov = iov; - i->nr_segs = nr_segs; - i->iov_offset = 0; - i->count = count + written; - - iov_iter_advance(i, written); -} - -static inline size_t iov_iter_count(struct iov_iter *i) -{ - return i->count; -} - -/* - * "descriptor" for what we're up to with a read. - * This allows us to use the same read code yet - * have multiple different users of the data that - * we read from a file. - * - * The simplest case just copies the data to user - * mode. - */ -typedef struct { - size_t written; - size_t count; - union { - char __user *buf; - void *data; - } arg; - int error; -} read_descriptor_t; - -typedef int (*read_actor_t)(read_descriptor_t *, struct page *, - unsigned long, unsigned long); - -struct address_space_operations { - int (*writepage)(struct page *page, struct writeback_control *wbc); - int (*readpage)(struct file *, struct page *); - void (*sync_page)(struct page *); - - /* Write back some dirty pages from this mapping. */ - int (*writepages)(struct address_space *, struct writeback_control *); - - /* Set a page dirty. Return true if this dirtied it */ - int (*set_page_dirty)(struct page *page); - - int (*readpages)(struct file *filp, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages); - - /* - * ext3 requires that a successful prepare_write() call be followed - * by a commit_write() call - they must be balanced - */ - int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); - int (*commit_write)(struct file *, struct page *, unsigned, unsigned); - - int (*write_begin)(struct file *, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata); - int (*write_end)(struct file *, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata); - - /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ - sector_t (*bmap)(struct address_space *, sector_t); - void (*invalidatepage) (struct page *, unsigned long); - int (*releasepage) (struct page *, gfp_t); - ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, - loff_t offset, unsigned long nr_segs); - int (*get_xip_mem)(struct address_space *, pgoff_t, int, - void **, unsigned long *); - /* migrate the contents of a page to the specified target */ - int (*migratepage) (struct address_space *, - struct page *, struct page *); - int (*launder_page) (struct page *); - int (*is_partially_uptodate) (struct page *, read_descriptor_t *, - unsigned long); -}; - -/* - * pagecache_write_begin/pagecache_write_end must be used by general code - * to write into the pagecache. - */ -int pagecache_write_begin(struct file *, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata); - -int pagecache_write_end(struct file *, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata); - -struct backing_dev_info; -struct address_space { - struct inode *host; /* owner: inode, block_device */ - struct radix_tree_root page_tree; /* radix tree of all pages */ - spinlock_t tree_lock; /* and lock protecting it */ - unsigned int i_mmap_writable;/* count VM_SHARED mappings */ - struct prio_tree_root i_mmap; /* tree of private and shared mappings */ - struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ - spinlock_t i_mmap_lock; /* protect tree, count, list */ - unsigned int truncate_count; /* Cover race condition with truncate */ - unsigned long nrpages; /* number of total pages */ - pgoff_t writeback_index;/* writeback starts here */ - const struct address_space_operations *a_ops; /* methods */ - unsigned long flags; /* error bits/gfp mask */ - struct backing_dev_info *backing_dev_info; /* device readahead, etc */ - spinlock_t private_lock; /* for use by the address_space */ - struct list_head private_list; /* ditto */ - struct address_space *assoc_mapping; /* ditto */ -} __attribute__((aligned(sizeof(long)))); - /* - * On most architectures that alignment is already the case; but - * must be enforced here for CRIS, to let the least signficant bit - * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. - */ - -struct block_device { - dev_t bd_dev; /* not a kdev_t - it's a search key */ - struct inode * bd_inode; /* will die */ - int bd_openers; - struct mutex bd_mutex; /* open/close mutex */ - struct semaphore bd_mount_sem; - struct list_head bd_inodes; - void * bd_holder; - int bd_holders; -#ifdef CONFIG_SYSFS - struct list_head bd_holder_list; -#endif - struct block_device * bd_contains; - unsigned bd_block_size; - struct hd_struct * bd_part; - /* number of times partitions within this device have been opened. */ - unsigned bd_part_count; - int bd_invalidated; - struct gendisk * bd_disk; - struct list_head bd_list; - struct backing_dev_info *bd_inode_backing_dev_info; - /* - * Private data. You must have bd_claim'ed the block_device - * to use this. NOTE: bd_claim allows an owner to claim - * the same device multiple times, the owner must take special - * care to not mess up bd_private for that case. - */ - unsigned long bd_private; -}; - -/* - * Radix-tree tags, for tagging dirty and writeback pages within the pagecache - * radix trees - */ -#define PAGECACHE_TAG_DIRTY 0 -#define PAGECACHE_TAG_WRITEBACK 1 - -int mapping_tagged(struct address_space *mapping, int tag); - -/* - * Might pages of this file be mapped into userspace? - */ -static inline int mapping_mapped(struct address_space *mapping) -{ - return !prio_tree_empty(&mapping->i_mmap) || - !list_empty(&mapping->i_mmap_nonlinear); -} - -/* - * Might pages of this file have been modified in userspace? - * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff - * marks vma as VM_SHARED if it is shared, and the file was opened for - * writing i.e. vma may be mprotected writable even if now readonly. - */ -static inline int mapping_writably_mapped(struct address_space *mapping) -{ - return mapping->i_mmap_writable != 0; -} - -/* - * Use sequence counter to get consistent i_size on 32-bit processors. - */ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) -#include -#define __NEED_I_SIZE_ORDERED -#define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) -#else -#define i_size_ordered_init(inode) do { } while (0) -#endif - -struct inode { - struct hlist_node i_hash; - struct list_head i_list; - struct list_head i_sb_list; - struct list_head i_dentry; - unsigned long i_ino; - atomic_t i_count; - unsigned int i_nlink; - uid_t i_uid; - gid_t i_gid; - tag_t i_tag; - dev_t i_rdev; - dev_t i_mdev; - u64 i_version; - loff_t i_size; -#ifdef __NEED_I_SIZE_ORDERED - seqcount_t i_size_seqcount; -#endif - struct timespec i_atime; - struct timespec i_mtime; - struct timespec i_ctime; - unsigned int i_blkbits; - blkcnt_t i_blocks; - unsigned short i_bytes; - umode_t i_mode; - spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ - struct mutex i_mutex; - struct rw_semaphore i_alloc_sem; - const struct inode_operations *i_op; - const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ - struct super_block *i_sb; - struct file_lock *i_flock; - struct address_space *i_mapping; - struct address_space i_data; -#ifdef CONFIG_QUOTA - struct dquot *i_dquot[MAXQUOTAS]; -#endif - struct list_head i_devices; - union { - struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; - struct cdev *i_cdev; - }; - int i_cindex; - - __u32 i_generation; - -#ifdef CONFIG_DNOTIFY - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ -#endif - -#ifdef CONFIG_INOTIFY - struct list_head inotify_watches; /* watches on this inode */ - struct mutex inotify_mutex; /* protects the watches list */ -#endif - - unsigned long i_state; - unsigned long dirtied_when; /* jiffies of first dirtying */ - - unsigned short i_flags; - unsigned short i_vflags; - - atomic_t i_writecount; -#ifdef CONFIG_SECURITY - void *i_security; -#endif - void *i_private; /* fs or device private pointer */ -}; - -/* - * inode->i_mutex nesting subclasses for the lock validator: - * - * 0: the object of the current VFS operation - * 1: parent - * 2: child/target - * 3: quota file - * - * The locking order between these classes is - * parent -> child -> normal -> xattr -> quota - */ -enum inode_i_mutex_lock_class -{ - I_MUTEX_NORMAL, - I_MUTEX_PARENT, - I_MUTEX_CHILD, - I_MUTEX_XATTR, - I_MUTEX_QUOTA -}; - -extern void inode_double_lock(struct inode *inode1, struct inode *inode2); -extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); - -/* - * NOTE: in a 32bit arch with a preemptable kernel and - * an UP compile the i_size_read/write must be atomic - * with respect to the local cpu (unlike with preempt disabled), - * but they don't need to be atomic with respect to other cpus like in - * true SMP (so they need either to either locally disable irq around - * the read or for example on x86 they can be still implemented as a - * cmpxchg8b without the need of the lock prefix). For SMP compiles - * and 64bit archs it makes no difference if preempt is enabled or not. - */ -static inline loff_t i_size_read(const struct inode *inode) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - loff_t i_size; - unsigned int seq; - - do { - seq = read_seqcount_begin(&inode->i_size_seqcount); - i_size = inode->i_size; - } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); - return i_size; -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) - loff_t i_size; - - preempt_disable(); - i_size = inode->i_size; - preempt_enable(); - return i_size; -#else - return inode->i_size; -#endif -} - -/* - * NOTE: unlike i_size_read(), i_size_write() does need locking around it - * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount - * can be lost, resulting in subsequent i_size_read() calls spinning forever. - */ -static inline void i_size_write(struct inode *inode, loff_t i_size) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - write_seqcount_begin(&inode->i_size_seqcount); - inode->i_size = i_size; - write_seqcount_end(&inode->i_size_seqcount); -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) - preempt_disable(); - inode->i_size = i_size; - preempt_enable(); -#else - inode->i_size = i_size; -#endif -} - -static inline unsigned iminor(const struct inode *inode) -{ - return MINOR(inode->i_mdev); -} - -static inline unsigned imajor(const struct inode *inode) -{ - return MAJOR(inode->i_mdev); -} - -extern struct block_device *I_BDEV(struct inode *inode); - -struct fown_struct { - rwlock_t lock; /* protects pid, uid, euid fields */ - struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ - enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ - uid_t uid, euid; /* uid/euid of process setting the owner */ - int signum; /* posix.1b rt signal to be delivered on IO */ -}; - -/* - * Track a single file's readahead state - */ -struct file_ra_state { - pgoff_t start; /* where readahead started */ - unsigned int size; /* # of readahead pages */ - unsigned int async_size; /* do asynchronous readahead when - there are only # of pages ahead */ - - unsigned int ra_pages; /* Maximum readahead window */ - int mmap_miss; /* Cache miss stat for mmap accesses */ - loff_t prev_pos; /* Cache last read() position */ -}; - -/* - * Check if @index falls in the readahead windows. - */ -static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) -{ - return (index >= ra->start && - index < ra->start + ra->size); -} - -#define FILE_MNT_WRITE_TAKEN 1 -#define FILE_MNT_WRITE_RELEASED 2 - -struct file { - /* - * fu_list becomes invalid after file_free is called and queued via - * fu_rcuhead for RCU freeing - */ - union { - struct list_head fu_list; - struct rcu_head fu_rcuhead; - } f_u; - struct path f_path; -#define f_dentry f_path.dentry -#define f_vfsmnt f_path.mnt - const struct file_operations *f_op; - atomic_long_t f_count; - unsigned int f_flags; - mode_t f_mode; - loff_t f_pos; - struct fown_struct f_owner; - unsigned int f_uid, f_gid; - xid_t f_xid; - struct file_ra_state f_ra; - - u64 f_version; -#ifdef CONFIG_SECURITY - void *f_security; -#endif - /* needed for tty driver, and maybe others */ - void *private_data; - -#ifdef CONFIG_EPOLL - /* Used by fs/eventpoll.c to link all the hooks to this file */ - struct list_head f_ep_links; - spinlock_t f_ep_lock; -#endif /* #ifdef CONFIG_EPOLL */ - struct address_space *f_mapping; -#ifdef CONFIG_DEBUG_WRITECOUNT - unsigned long f_mnt_write_state; -#endif -}; -extern spinlock_t files_lock; -#define file_list_lock() spin_lock(&files_lock); -#define file_list_unlock() spin_unlock(&files_lock); - -#define get_file(x) atomic_long_inc(&(x)->f_count) -#define file_count(x) atomic_long_read(&(x)->f_count) - -#ifdef CONFIG_DEBUG_WRITECOUNT -static inline void file_take_write(struct file *f) -{ - WARN_ON(f->f_mnt_write_state != 0); - f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; -} -static inline void file_release_write(struct file *f) -{ - f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; -} -static inline void file_reset_write(struct file *f) -{ - f->f_mnt_write_state = 0; -} -static inline void file_check_state(struct file *f) -{ - /* - * At this point, either both or neither of these bits - * should be set. - */ - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); -} -static inline int file_check_writeable(struct file *f) -{ - if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) - return 0; - printk(KERN_WARNING "writeable file with no " - "mnt_want_write()\n"); - WARN_ON(1); - return -EINVAL; -} -#else /* !CONFIG_DEBUG_WRITECOUNT */ -static inline void file_take_write(struct file *filp) {} -static inline void file_release_write(struct file *filp) {} -static inline void file_reset_write(struct file *filp) {} -static inline void file_check_state(struct file *filp) {} -static inline int file_check_writeable(struct file *filp) -{ - return 0; -} -#endif /* CONFIG_DEBUG_WRITECOUNT */ - -#define MAX_NON_LFS ((1UL<<31) - 1) - -/* Page cache limit. The filesystems should put that into their s_maxbytes - limits, otherwise bad things can happen in VM. */ -#if BITS_PER_LONG==32 -#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) -#elif BITS_PER_LONG==64 -#define MAX_LFS_FILESIZE 0x7fffffffffffffffUL -#endif - -#define FL_POSIX 1 -#define FL_FLOCK 2 -#define FL_ACCESS 8 /* not trying to lock, just looking */ -#define FL_EXISTS 16 /* when unlocking, test for existence */ -#define FL_LEASE 32 /* lease held on this file */ -#define FL_CLOSE 64 /* unlock on close */ -#define FL_SLEEP 128 /* A blocking lock */ - -/* - * Special return value from posix_lock_file() and vfs_lock_file() for - * asynchronous locking. - */ -#define FILE_LOCK_DEFERRED 1 - -/* - * The POSIX file lock owner is determined by - * the "struct files_struct" in the thread group - * (or NULL for no owner - BSD locks). - * - * Lockd stuffs a "host" pointer into this. - */ -typedef struct files_struct *fl_owner_t; - -struct file_lock_operations { - void (*fl_copy_lock)(struct file_lock *, struct file_lock *); - void (*fl_release_private)(struct file_lock *); -}; - -struct lock_manager_operations { - int (*fl_compare_owner)(struct file_lock *, struct file_lock *); - void (*fl_notify)(struct file_lock *); /* unblock callback */ - int (*fl_grant)(struct file_lock *, struct file_lock *, int); - void (*fl_copy_lock)(struct file_lock *, struct file_lock *); - void (*fl_release_private)(struct file_lock *); - void (*fl_break)(struct file_lock *); - int (*fl_mylease)(struct file_lock *, struct file_lock *); - int (*fl_change)(struct file_lock **, int); -}; - -/* that will die - we need it for nfs_lock_info */ -#include - -struct file_lock { - struct file_lock *fl_next; /* singly linked list for this inode */ - struct list_head fl_link; /* doubly linked list of all locks */ - struct list_head fl_block; /* circular list of blocked processes */ - fl_owner_t fl_owner; - unsigned char fl_flags; - unsigned char fl_type; - unsigned int fl_pid; - struct pid *fl_nspid; - wait_queue_head_t fl_wait; - struct file *fl_file; - loff_t fl_start; - loff_t fl_end; - xid_t fl_xid; - - struct fasync_struct * fl_fasync; /* for lease break notifications */ - unsigned long fl_break_time; /* for nonblocking lease breaks */ - - struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ - struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ - union { - struct nfs_lock_info nfs_fl; - struct nfs4_lock_info nfs4_fl; - struct { - struct list_head link; /* link in AFS vnode's pending_locks list */ - int state; /* state of grant or error if -ve */ - } afs; - } fl_u; -}; - -/* The following constant reflects the upper bound of the file/locking space */ -#ifndef OFFSET_MAX -#define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) -#define OFFSET_MAX INT_LIMIT(loff_t) -#define OFFT_OFFSET_MAX INT_LIMIT(off_t) -#endif - -#include - -extern int fcntl_getlk(struct file *, struct flock __user *); -extern int fcntl_setlk(unsigned int, struct file *, unsigned int, - struct flock __user *); - -#if BITS_PER_LONG == 32 -extern int fcntl_getlk64(struct file *, struct flock64 __user *); -extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, - struct flock64 __user *); -#endif - -extern void send_sigio(struct fown_struct *fown, int fd, int band); -extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); -extern int fcntl_getlease(struct file *filp); - -/* fs/sync.c */ -extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset, - loff_t endbyte, unsigned int flags); - -/* fs/locks.c */ -extern void locks_init_lock(struct file_lock *); -extern void locks_copy_lock(struct file_lock *, struct file_lock *); -extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); -extern void locks_remove_posix(struct file *, fl_owner_t); -extern void locks_remove_flock(struct file *); -extern void posix_test_lock(struct file *, struct file_lock *); -extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); -extern int posix_lock_file_wait(struct file *, struct file_lock *); -extern int posix_unblock_lock(struct file *, struct file_lock *); -extern int vfs_test_lock(struct file *, struct file_lock *); -extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); -extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); -extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); -extern int __break_lease(struct inode *inode, unsigned int flags); -extern void lease_get_mtime(struct inode *, struct timespec *time); -extern int generic_setlease(struct file *, long, struct file_lock **); -extern int vfs_setlease(struct file *, long, struct file_lock **); -extern int lease_modify(struct file_lock **, int); -extern int lock_may_read(struct inode *, loff_t start, unsigned long count); -extern int lock_may_write(struct inode *, loff_t start, unsigned long count); -extern struct seq_operations locks_seq_operations; - -struct fasync_struct { - int magic; - int fa_fd; - struct fasync_struct *fa_next; /* singly linked list */ - struct file *fa_file; -}; - -#define FASYNC_MAGIC 0x4601 - -/* SMP safe fasync helpers: */ -extern int fasync_helper(int, struct file *, int, struct fasync_struct **); -/* can be called from interrupts */ -extern void kill_fasync(struct fasync_struct **, int, int); -/* only for net: no internal synchronization */ -extern void __kill_fasync(struct fasync_struct *, int, int); - -extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); -extern int f_setown(struct file *filp, unsigned long arg, int force); -extern void f_delown(struct file *filp); -extern pid_t f_getown(struct file *filp); -extern int send_sigurg(struct fown_struct *fown); - -/* - * Umount options - */ - -#define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ -#define MNT_DETACH 0x00000002 /* Just detach from the tree */ -#define MNT_EXPIRE 0x00000004 /* Mark for expiry */ - -extern struct list_head super_blocks; -extern spinlock_t sb_lock; - -#define sb_entry(list) list_entry((list), struct super_block, s_list) -#define S_BIAS (1<<30) -struct super_block { - struct list_head s_list; /* Keep this first */ - dev_t s_dev; /* search index; _not_ kdev_t */ - unsigned long s_blocksize; - unsigned char s_blocksize_bits; - unsigned char s_dirt; - unsigned long long s_maxbytes; /* Max file size */ - struct file_system_type *s_type; - const struct super_operations *s_op; - struct dquot_operations *dq_op; - struct quotactl_ops *s_qcop; - const struct export_operations *s_export_op; - unsigned long s_flags; - unsigned long s_magic; - struct dentry *s_root; - struct rw_semaphore s_umount; - struct mutex s_lock; - int s_count; - int s_need_sync_fs; - atomic_t s_active; -#ifdef CONFIG_SECURITY - void *s_security; -#endif - struct xattr_handler **s_xattr; - - struct list_head s_inodes; /* all inodes */ - struct list_head s_dirty; /* dirty inodes */ - struct list_head s_io; /* parked for writeback */ - struct list_head s_more_io; /* parked for more writeback */ - struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ - struct list_head s_files; - /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ - struct list_head s_dentry_lru; /* unused dentry lru */ - int s_nr_dentry_unused; /* # of dentry on lru */ - - struct block_device *s_bdev; - struct mtd_info *s_mtd; - struct list_head s_instances; - struct quota_info s_dquot; /* Diskquota specific options */ - - int s_frozen; - wait_queue_head_t s_wait_unfrozen; - - char s_id[32]; /* Informational name */ - - void *s_fs_info; /* Filesystem private info */ - - /* - * The next field is for VFS *only*. No filesystems have any business - * even looking at it. You had been warned. - */ - struct mutex s_vfs_rename_mutex; /* Kludge */ - - /* Granularity of c/m/atime in ns. - Cannot be worse than a second */ - u32 s_time_gran; - - /* - * Filesystem subtype. If non-empty the filesystem type field - * in /proc/mounts will be "type.subtype" - */ - char *s_subtype; - - /* - * Saved mount options for lazy filesystems using - * generic_show_options() - */ - char *s_options; -}; - -extern struct timespec current_fs_time(struct super_block *sb); - -/* - * Snapshotting support. - */ -enum { - SB_UNFROZEN = 0, - SB_FREEZE_WRITE = 1, - SB_FREEZE_TRANS = 2, -}; - -#define vfs_check_frozen(sb, level) \ - wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) - -#define get_fs_excl() atomic_inc(¤t->fs_excl) -#define put_fs_excl() atomic_dec(¤t->fs_excl) -#define has_fs_excl() atomic_read(¤t->fs_excl) - -#define is_owner_or_cap(inode) \ - ((current->fsuid == (inode)->i_uid) || capable(CAP_FOWNER)) - -/* not quite ready to be deprecated, but... */ -extern void lock_super(struct super_block *); -extern void unlock_super(struct super_block *); - -/* - * VFS helper functions.. - */ -extern int vfs_permission(struct nameidata *, int); -extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); -extern int vfs_mkdir(struct inode *, struct dentry *, int); -extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); -extern int vfs_symlink(struct inode *, struct dentry *, const char *); -extern int vfs_link(struct dentry *, struct inode *, struct dentry *); -extern int vfs_rmdir(struct inode *, struct dentry *); -extern int vfs_unlink(struct inode *, struct dentry *); -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); - -/* - * VFS dentry helper functions. - */ -extern void dentry_unhash(struct dentry *dentry); - -/* - * VFS file helper functions. - */ -extern int file_permission(struct file *, int); - -/* - * File types - * - * NOTE! These match bits 12..15 of stat.st_mode - * (ie "(i_mode >> 12) & 15"). - */ -#define DT_UNKNOWN 0 -#define DT_FIFO 1 -#define DT_CHR 2 -#define DT_DIR 4 -#define DT_BLK 6 -#define DT_REG 8 -#define DT_LNK 10 -#define DT_SOCK 12 -#define DT_WHT 14 - -#define OSYNC_METADATA (1<<0) -#define OSYNC_DATA (1<<1) -#define OSYNC_INODE (1<<2) -int generic_osync_inode(struct inode *, struct address_space *, int); - -/* - * This is the "filldir" function type, used by readdir() to let - * the kernel specify what kind of dirent layout it wants to have. - * This allows the kernel to read directories into kernel space or - * to have different dirent layouts depending on the binary type. - */ -typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); - -struct block_device_operations { - int (*open) (struct inode *, struct file *); - int (*release) (struct inode *, struct file *); - int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); - long (*unlocked_ioctl) (struct file *, unsigned, unsigned long); - long (*compat_ioctl) (struct file *, unsigned, unsigned long); - int (*direct_access) (struct block_device *, sector_t, - void **, unsigned long *); - int (*media_changed) (struct gendisk *); - int (*revalidate_disk) (struct gendisk *); - int (*getgeo)(struct block_device *, struct hd_geometry *); - struct module *owner; -}; - -/* These macros are for out of kernel modules to test that - * the kernel supports the unlocked_ioctl and compat_ioctl - * fields in struct file_operations. */ -#define HAVE_COMPAT_IOCTL 1 -#define HAVE_UNLOCKED_IOCTL 1 - -/* - * NOTE: - * read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl - * can be called without the big kernel lock held in all filesystems. - */ -struct file_operations { - struct module *owner; - loff_t (*llseek) (struct file *, loff_t, int); - ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); - ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); - ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); - int (*readdir) (struct file *, void *, filldir_t); - unsigned int (*poll) (struct file *, struct poll_table_struct *); - int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); - long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); - long (*compat_ioctl) (struct file *, unsigned int, unsigned long); - int (*mmap) (struct file *, struct vm_area_struct *); - int (*open) (struct inode *, struct file *); - int (*flush) (struct file *, fl_owner_t id); - int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, struct dentry *, int datasync); - int (*aio_fsync) (struct kiocb *, int datasync); - int (*fasync) (int, struct file *, int); - int (*lock) (struct file *, int, struct file_lock *); - ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); - unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); - int (*check_flags)(int); - int (*dir_notify)(struct file *filp, unsigned long arg); - int (*flock) (struct file *, int, struct file_lock *); - ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); - ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); - int (*setlease)(struct file *, long, struct file_lock **); -}; - -struct inode_operations { - int (*create) (struct inode *,struct dentry *,int, struct nameidata *); - struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); - int (*link) (struct dentry *,struct inode *,struct dentry *); - int (*unlink) (struct inode *,struct dentry *); - int (*symlink) (struct inode *,struct dentry *,const char *); - int (*mkdir) (struct inode *,struct dentry *,int); - int (*rmdir) (struct inode *,struct dentry *); - int (*mknod) (struct inode *,struct dentry *,int,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); - int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*setattr) (struct dentry *, struct iattr *); - int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); - int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); - ssize_t (*listxattr) (struct dentry *, char *, size_t); - int (*removexattr) (struct dentry *, const char *); - void (*truncate_range)(struct inode *, loff_t, loff_t); - long (*fallocate)(struct inode *inode, int mode, loff_t offset, - loff_t len); - int (*sync_flags) (struct inode *); -}; - -struct seq_file; - -ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, - unsigned long nr_segs, unsigned long fast_segs, - struct iovec *fast_pointer, - struct iovec **ret_pointer); - -extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); -extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); -extern ssize_t vfs_readv(struct file *, const struct iovec __user *, - unsigned long, loff_t *); -extern ssize_t vfs_writev(struct file *, const struct iovec __user *, - unsigned long, loff_t *); -ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t); - -struct super_operations { - struct inode *(*alloc_inode)(struct super_block *sb); - void (*destroy_inode)(struct inode *); - - void (*dirty_inode) (struct inode *); - int (*write_inode) (struct inode *, int); - void (*drop_inode) (struct inode *); - void (*delete_inode) (struct inode *); - void (*put_super) (struct super_block *); - void (*write_super) (struct super_block *); - int (*sync_fs)(struct super_block *sb, int wait); - void (*write_super_lockfs) (struct super_block *); - void (*unlockfs) (struct super_block *); - int (*statfs) (struct dentry *, struct kstatfs *); - int (*remount_fs) (struct super_block *, int *, char *); - void (*clear_inode) (struct inode *); - void (*umount_begin) (struct super_block *); - - int (*show_options)(struct seq_file *, struct vfsmount *); - int (*show_stats)(struct seq_file *, struct vfsmount *); -#ifdef CONFIG_QUOTA - ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); - ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); -#endif -}; - -/* - * Inode state bits. Protected by inode_lock. - * - * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, - * I_DIRTY_DATASYNC and I_DIRTY_PAGES. - * - * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, - * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at - * various stages of removing an inode. - * - * Two bits are used for locking and completion notification, I_LOCK and I_SYNC. - * - * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on - * fdatasync(). i_atime is the usual cause. - * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of - * these changes separately from I_DIRTY_SYNC so that we - * don't have to write inode on fdatasync() when only - * mtime has changed in it. - * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. - * I_NEW get_new_inode() sets i_state to I_LOCK|I_NEW. Both - * are cleared by unlock_new_inode(), called from iget(). - * I_WILL_FREE Must be set when calling write_inode_now() if i_count - * is zero. I_FREEING must be set when I_WILL_FREE is - * cleared. - * I_FREEING Set when inode is about to be freed but still has dirty - * pages or buffers attached or the inode itself is still - * dirty. - * I_CLEAR Set by clear_inode(). In this state the inode is clean - * and can be destroyed. - * - * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are - * prohibited for many purposes. iget() must wait for - * the inode to be completely released, then create it - * anew. Other functions will just ignore such inodes, - * if appropriate. I_LOCK is used for waiting. - * - * I_LOCK Serves as both a mutex and completion notification. - * New inodes set I_LOCK. If two processes both create - * the same inode, one of them will release its inode and - * wait for I_LOCK to be released before returning. - * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can - * also cause waiting on I_LOCK, without I_LOCK actually - * being set. find_inode() uses this to prevent returning - * nearly-dead inodes. - * I_SYNC Similar to I_LOCK, but limited in scope to writeback - * of inode dirty data. Having a separate lock for this - * purpose reduces latency and prevents some filesystem- - * specific deadlocks. - * - * Q: What is the difference between I_WILL_FREE and I_FREEING? - * Q: igrab() only checks on (I_FREEING|I_WILL_FREE). Should it also check on - * I_CLEAR? If not, why? - */ -#define I_DIRTY_SYNC 1 -#define I_DIRTY_DATASYNC 2 -#define I_DIRTY_PAGES 4 -#define I_NEW 8 -#define I_WILL_FREE 16 -#define I_FREEING 32 -#define I_CLEAR 64 -#define __I_LOCK 7 -#define I_LOCK (1 << __I_LOCK) -#define __I_SYNC 8 -#define I_SYNC (1 << __I_SYNC) - -#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) - -extern void __mark_inode_dirty(struct inode *, int); -static inline void mark_inode_dirty(struct inode *inode) -{ - __mark_inode_dirty(inode, I_DIRTY); -} - -static inline void mark_inode_dirty_sync(struct inode *inode) -{ - __mark_inode_dirty(inode, I_DIRTY_SYNC); -} - -/** - * inc_nlink - directly increment an inode's link count - * @inode: inode - * - * This is a low-level filesystem helper to replace any - * direct filesystem manipulation of i_nlink. Currently, - * it is only here for parity with dec_nlink(). - */ -static inline void inc_nlink(struct inode *inode) -{ - inode->i_nlink++; -} - -static inline void inode_inc_link_count(struct inode *inode) -{ - inc_nlink(inode); - mark_inode_dirty(inode); -} - -/** - * drop_nlink - directly drop an inode's link count - * @inode: inode - * - * This is a low-level filesystem helper to replace any - * direct filesystem manipulation of i_nlink. In cases - * where we are attempting to track writes to the - * filesystem, a decrement to zero means an imminent - * write when the file is truncated and actually unlinked - * on the filesystem. - */ -static inline void drop_nlink(struct inode *inode) -{ - inode->i_nlink--; -} - -/** - * clear_nlink - directly zero an inode's link count - * @inode: inode - * - * This is a low-level filesystem helper to replace any - * direct filesystem manipulation of i_nlink. See - * drop_nlink() for why we care about i_nlink hitting zero. - */ -static inline void clear_nlink(struct inode *inode) -{ - inode->i_nlink = 0; -} - -static inline void inode_dec_link_count(struct inode *inode) -{ - drop_nlink(inode); - mark_inode_dirty(inode); -} - -/** - * inode_inc_iversion - increments i_version - * @inode: inode that need to be updated - * - * Every time the inode is modified, the i_version field will be incremented. - * The filesystem has to be mounted with i_version flag - */ - -static inline void inode_inc_iversion(struct inode *inode) -{ - spin_lock(&inode->i_lock); - inode->i_version++; - spin_unlock(&inode->i_lock); -} - -extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); -static inline void file_accessed(struct file *file) -{ - if (!(file->f_flags & O_NOATIME)) - touch_atime(file->f_path.mnt, file->f_path.dentry); -} - -int sync_inode(struct inode *inode, struct writeback_control *wbc); - -struct file_system_type { - const char *name; - int fs_flags; - int (*get_sb) (struct file_system_type *, int, - const char *, void *, struct vfsmount *); - void (*kill_sb) (struct super_block *); - struct module *owner; - struct file_system_type * next; - struct list_head fs_supers; - - struct lock_class_key s_lock_key; - struct lock_class_key s_umount_key; - - struct lock_class_key i_lock_key; - struct lock_class_key i_mutex_key; - struct lock_class_key i_mutex_dir_key; - struct lock_class_key i_alloc_sem_key; -}; - -extern int get_sb_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt); -extern int get_sb_single(struct file_system_type *fs_type, - int flags, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt); -extern int get_sb_nodev(struct file_system_type *fs_type, - int flags, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt); -void generic_shutdown_super(struct super_block *sb); -void kill_block_super(struct super_block *sb); -void kill_anon_super(struct super_block *sb); -void kill_litter_super(struct super_block *sb); -void deactivate_super(struct super_block *sb); -int set_anon_super(struct super_block *s, void *data); -struct super_block *sget(struct file_system_type *type, - int (*test)(struct super_block *,void *), - int (*set)(struct super_block *,void *), - void *data); -extern int get_sb_pseudo(struct file_system_type *, char *, - const struct super_operations *ops, unsigned long, - struct vfsmount *mnt); -extern int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); -int __put_super_and_need_restart(struct super_block *sb); -void unnamed_dev_init(void); - -/* Alas, no aliases. Too much hassle with bringing module.h everywhere */ -#define fops_get(fops) \ - (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) -#define fops_put(fops) \ - do { if (fops) module_put((fops)->owner); } while(0) - -extern int register_filesystem(struct file_system_type *); -extern int unregister_filesystem(struct file_system_type *); -extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); -#define kern_mount(type) kern_mount_data(type, NULL) -extern int may_umount_tree(struct vfsmount *); -extern int may_umount(struct vfsmount *); -extern long do_mount(char *, char *, char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *); -extern void drop_collected_mounts(struct vfsmount *); - -extern int vfs_statfs(struct dentry *, struct kstatfs *); - -/* /sys/fs */ -extern struct kobject *fs_kobj; - -#define FLOCK_VERIFY_READ 1 -#define FLOCK_VERIFY_WRITE 2 - -extern int locks_mandatory_locked(struct inode *); -extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); - -/* - * Candidates for mandatory locking have the setgid bit set - * but no group execute bit - an otherwise meaningless combination. - */ - -static inline int __mandatory_lock(struct inode *ino) -{ - return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; -} - -/* - * ... and these candidates should be on MS_MANDLOCK mounted fs, - * otherwise these will be advisory locks - */ - -static inline int mandatory_lock(struct inode *ino) -{ - return IS_MANDLOCK(ino) && __mandatory_lock(ino); -} - -static inline int locks_verify_locked(struct inode *inode) -{ - if (mandatory_lock(inode)) - return locks_mandatory_locked(inode); - return 0; -} - -extern int rw_verify_area(int, struct file *, loff_t *, size_t); - -static inline int locks_verify_truncate(struct inode *inode, - struct file *filp, - loff_t size) -{ - if (inode->i_flock && mandatory_lock(inode)) - return locks_mandatory_area( - FLOCK_VERIFY_WRITE, inode, filp, - size < inode->i_size ? size : inode->i_size, - (size < inode->i_size ? inode->i_size - size - : size - inode->i_size) - ); - return 0; -} - -static inline int break_lease(struct inode *inode, unsigned int mode) -{ - if (inode->i_flock) - return __break_lease(inode, mode); - return 0; -} - -/* fs/open.c */ - -extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, - struct file *filp); -extern long do_sys_open(int dfd, const char __user *filename, int flags, - int mode); -extern struct file *filp_open(const char *, int, int); -extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -extern int filp_close(struct file *, fl_owner_t id); -extern char * getname(const char __user *); - -/* fs/dcache.c */ -extern void __init vfs_caches_init_early(void); -extern void __init vfs_caches_init(unsigned long); - -extern struct kmem_cache *names_cachep; - -#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) -#define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) -#ifndef CONFIG_AUDITSYSCALL -#define putname(name) __putname(name) -#else -extern void putname(const char *name); -#endif - -#ifdef CONFIG_BLOCK -extern int register_blkdev(unsigned int, const char *); -extern void unregister_blkdev(unsigned int, const char *); -extern struct block_device *bdget(dev_t); -extern void bd_set_size(struct block_device *, loff_t size); -extern void bd_forget(struct inode *inode); -extern void bdput(struct block_device *); -extern struct block_device *open_by_devnum(dev_t, unsigned); -#else -static inline void bd_forget(struct inode *inode) {} -#endif -extern const struct file_operations def_blk_fops; -extern const struct file_operations def_chr_fops; -extern const struct file_operations bad_sock_fops; -extern const struct file_operations def_fifo_fops; -#ifdef CONFIG_BLOCK -extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); -extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long); -extern int blkdev_driver_ioctl(struct inode *inode, struct file *file, - struct gendisk *disk, unsigned cmd, - unsigned long arg); -extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -extern int blkdev_get(struct block_device *, mode_t, unsigned); -extern int blkdev_put(struct block_device *); -extern int bd_claim(struct block_device *, void *); -extern void bd_release(struct block_device *); -#ifdef CONFIG_SYSFS -extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); -extern void bd_release_from_disk(struct block_device *, struct gendisk *); -#else -#define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) -#define bd_release_from_disk(bdev, disk) bd_release(bdev) -#endif -#endif - -/* fs/char_dev.c */ -#define CHRDEV_MAJOR_HASH_SIZE 255 -extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); -extern int register_chrdev_region(dev_t, unsigned, const char *); -extern int register_chrdev(unsigned int, const char *, - const struct file_operations *); -extern void unregister_chrdev(unsigned int, const char *); -extern void unregister_chrdev_region(dev_t, unsigned); -extern void chrdev_show(struct seq_file *,off_t); - -/* fs/block_dev.c */ -#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ - -#ifdef CONFIG_BLOCK -#define BLKDEV_MAJOR_HASH_SIZE 255 -extern const char *__bdevname(dev_t, char *buffer); -extern const char *bdevname(struct block_device *bdev, char *buffer); -extern struct block_device *lookup_bdev(const char *); -extern struct block_device *open_bdev_excl(const char *, int, void *); -extern void close_bdev_excl(struct block_device *); -extern void blkdev_show(struct seq_file *,off_t); -#else -#define BLKDEV_MAJOR_HASH_SIZE 0 -#endif - -extern void init_special_inode(struct inode *, umode_t, dev_t); - -/* Invalid inode operations -- fs/bad_inode.c */ -extern void make_bad_inode(struct inode *); -extern int is_bad_inode(struct inode *); - -extern const struct file_operations read_pipefifo_fops; -extern const struct file_operations write_pipefifo_fops; -extern const struct file_operations rdwr_pipefifo_fops; - -extern int fs_may_remount_ro(struct super_block *); - -#ifdef CONFIG_BLOCK -/* - * return READ, READA, or WRITE - */ -#define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) - -/* - * return data direction, READ or WRITE - */ -#define bio_data_dir(bio) ((bio)->bi_rw & 1) - -extern int check_disk_change(struct block_device *); -extern int __invalidate_device(struct block_device *); -extern int invalidate_partition(struct gendisk *, int); -#endif -extern int invalidate_inodes(struct super_block *); -unsigned long __invalidate_mapping_pages(struct address_space *mapping, - pgoff_t start, pgoff_t end, - bool be_atomic); -unsigned long invalidate_mapping_pages(struct address_space *mapping, - pgoff_t start, pgoff_t end); - -static inline unsigned long __deprecated -invalidate_inode_pages(struct address_space *mapping) -{ - return invalidate_mapping_pages(mapping, 0, ~0UL); -} - -static inline void invalidate_remote_inode(struct inode *inode) -{ - if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) - invalidate_mapping_pages(inode->i_mapping, 0, -1); -} -extern int invalidate_inode_pages2(struct address_space *mapping); -extern int invalidate_inode_pages2_range(struct address_space *mapping, - pgoff_t start, pgoff_t end); -extern void generic_sync_sb_inodes(struct super_block *sb, - struct writeback_control *wbc); -extern int write_inode_now(struct inode *, int); -extern int filemap_fdatawrite(struct address_space *); -extern int filemap_flush(struct address_space *); -extern int filemap_fdatawait(struct address_space *); -extern int filemap_write_and_wait(struct address_space *mapping); -extern int filemap_write_and_wait_range(struct address_space *mapping, - loff_t lstart, loff_t lend); -extern int wait_on_page_writeback_range(struct address_space *mapping, - pgoff_t start, pgoff_t end); -extern int __filemap_fdatawrite_range(struct address_space *mapping, - loff_t start, loff_t end, int sync_mode); -extern int filemap_fdatawrite_range(struct address_space *mapping, - loff_t start, loff_t end); - -extern long do_fsync(struct file *file, int datasync); -extern void sync_supers(void); -extern void sync_filesystems(int wait); -extern void __fsync_super(struct super_block *sb); -extern void emergency_sync(void); -extern void emergency_remount(void); -extern int do_remount_sb(struct super_block *sb, int flags, - void *data, int force); -#ifdef CONFIG_BLOCK -extern sector_t bmap(struct inode *, sector_t); -#endif -extern int notify_change(struct dentry *, struct iattr *); -extern int inode_permission(struct inode *, int); -extern int generic_permission(struct inode *, int, - int (*check_acl)(struct inode *, int)); - -extern int get_write_access(struct inode *); -extern int deny_write_access(struct file *); -static inline void put_write_access(struct inode * inode) -{ - atomic_dec(&inode->i_writecount); -} -static inline void allow_write_access(struct file *file) -{ - if (file) - atomic_inc(&file->f_path.dentry->d_inode->i_writecount); -} -extern int do_pipe(int *); -extern int do_pipe_flags(int *, int); -extern struct file *create_read_pipe(struct file *f, int flags); -extern struct file *create_write_pipe(int flags); -extern void free_write_pipe(struct file *); - -extern struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode); -extern int may_open(struct nameidata *, int, int); - -extern int kernel_read(struct file *, unsigned long, char *, unsigned long); -extern struct file * open_exec(const char *); - -/* fs/dcache.c -- generic fs support functions */ -extern int is_subdir(struct dentry *, struct dentry *); -extern ino_t find_inode_number(struct dentry *, struct qstr *); - -#include - -/* needed for stackable file system support */ -extern loff_t default_llseek(struct file *file, loff_t offset, int origin); - -extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); - -extern void inode_init_once(struct inode *); -extern void iput(struct inode *); -extern struct inode * igrab(struct inode *); -extern ino_t iunique(struct super_block *, ino_t); -extern int inode_needs_sync(struct inode *inode); -extern void generic_delete_inode(struct inode *inode); -extern void generic_drop_inode(struct inode *inode); - -extern struct inode *ilookup5_nowait(struct super_block *sb, - unsigned long hashval, int (*test)(struct inode *, void *), - void *data); -extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, - int (*test)(struct inode *, void *), void *data); -extern struct inode *ilookup(struct super_block *sb, unsigned long ino); - -extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); -extern struct inode * iget_locked(struct super_block *, unsigned long); -extern void unlock_new_inode(struct inode *); - -extern void __iget(struct inode * inode); -extern void iget_failed(struct inode *); -extern void clear_inode(struct inode *); -extern void destroy_inode(struct inode *); -extern struct inode *new_inode(struct super_block *); -extern int should_remove_suid(struct dentry *); -extern int file_remove_suid(struct file *); - -extern void __insert_inode_hash(struct inode *, unsigned long hashval); -extern void remove_inode_hash(struct inode *); -static inline void insert_inode_hash(struct inode *inode) { - __insert_inode_hash(inode, inode->i_ino); -} - -extern struct file * get_empty_filp(void); -extern void file_move(struct file *f, struct list_head *list); -extern void file_kill(struct file *f); -#ifdef CONFIG_BLOCK -struct bio; -extern void submit_bio(int, struct bio *); -extern int bdev_read_only(struct block_device *); -#endif -extern int set_blocksize(struct block_device *, int); -extern int sb_set_blocksize(struct super_block *, int); -extern int sb_min_blocksize(struct super_block *, int); -extern int sb_has_dirty_inodes(struct super_block *); - -extern int generic_file_mmap(struct file *, struct vm_area_struct *); -extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); -extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); -int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); -extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); -extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); -extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *, - unsigned long, loff_t); -extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, - unsigned long *, loff_t, loff_t *, size_t, size_t); -extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, - unsigned long, loff_t, loff_t *, size_t, ssize_t); -extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); -extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); -extern int generic_segment_checks(const struct iovec *iov, - unsigned long *nr_segs, size_t *count, int access_flags); - -/* fs/splice.c */ -extern ssize_t generic_file_splice_read(struct file *, loff_t *, - struct pipe_inode_info *, size_t, unsigned int); -extern ssize_t generic_file_splice_write(struct pipe_inode_info *, - struct file *, loff_t *, size_t, unsigned int); -extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *, - struct file *, loff_t *, size_t, unsigned int); -extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, - struct file *out, loff_t *, size_t len, unsigned int flags); -extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, - size_t len, unsigned int flags); - -extern void -file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); -extern loff_t no_llseek(struct file *file, loff_t offset, int origin); -extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); -extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, - int origin); -extern int generic_file_open(struct inode * inode, struct file * filp); -extern int nonseekable_open(struct inode * inode, struct file * filp); - -#ifdef CONFIG_FS_XIP -extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, - loff_t *ppos); -extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); -extern ssize_t xip_file_write(struct file *filp, const char __user *buf, - size_t len, loff_t *ppos); -extern int xip_truncate_page(struct address_space *mapping, loff_t from); -#else -static inline int xip_truncate_page(struct address_space *mapping, loff_t from) -{ - return 0; -} -#endif - -#ifdef CONFIG_BLOCK -ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, - int lock_type); - -enum { - DIO_LOCKING = 1, /* need locking between buffered and direct access */ - DIO_NO_LOCKING, /* bdev; no locking at all between buffered/direct */ - DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */ -}; - -static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, - struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_block_t get_block, - dio_iodone_t end_io) -{ - return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, DIO_LOCKING); -} - -static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, - struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_block_t get_block, - dio_iodone_t end_io) -{ - return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, DIO_NO_LOCKING); -} - -static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb, - struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_block_t get_block, - dio_iodone_t end_io) -{ - return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, DIO_OWN_LOCKING); -} -#endif - -extern const struct file_operations generic_ro_fops; - -#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) - -extern int vfs_readlink(struct dentry *, char __user *, int, const char *); -extern int vfs_follow_link(struct nameidata *, const char *); -extern int page_readlink(struct dentry *, char __user *, int); -extern void *page_follow_link_light(struct dentry *, struct nameidata *); -extern void page_put_link(struct dentry *, struct nameidata *, void *); -extern int __page_symlink(struct inode *inode, const char *symname, int len, - int nofs); -extern int page_symlink(struct inode *inode, const char *symname, int len); -extern const struct inode_operations page_symlink_inode_operations; -extern int generic_readlink(struct dentry *, char __user *, int); -extern void generic_fillattr(struct inode *, struct kstat *); -extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); -void inode_add_bytes(struct inode *inode, loff_t bytes); -void inode_sub_bytes(struct inode *inode, loff_t bytes); -loff_t inode_get_bytes(struct inode *inode); -void inode_set_bytes(struct inode *inode, loff_t bytes); - -extern int vfs_readdir(struct file *, filldir_t, void *); - -extern int vfs_stat(char __user *, struct kstat *); -extern int vfs_lstat(char __user *, struct kstat *); -extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); -extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); -extern int vfs_fstat(unsigned int, struct kstat *); - -extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, - unsigned long arg); - -extern void get_filesystem(struct file_system_type *fs); -extern void put_filesystem(struct file_system_type *fs); -extern struct file_system_type *get_fs_type(const char *name); -extern struct super_block *get_super(struct block_device *); -extern struct super_block *user_get_super(dev_t); -extern void drop_super(struct super_block *sb); - -extern int dcache_dir_open(struct inode *, struct file *); -extern int dcache_dir_close(struct inode *, struct file *); -extern loff_t dcache_dir_lseek(struct file *, loff_t, int); -extern int dcache_readdir(struct file *, void *, filldir_t); -extern int dcache_readdir_filter(struct file *, void *, filldir_t, int (*)(struct dentry *)); -extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); -extern int simple_statfs(struct dentry *, struct kstatfs *); -extern int simple_link(struct dentry *, struct inode *, struct dentry *); -extern int simple_unlink(struct inode *, struct dentry *); -extern int simple_rmdir(struct inode *, struct dentry *); -extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -extern int simple_sync_file(struct file *, struct dentry *, int); -extern int simple_empty(struct dentry *); -extern int simple_readpage(struct file *file, struct page *page); -extern int simple_prepare_write(struct file *file, struct page *page, - unsigned offset, unsigned to); -extern int simple_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata); -extern int simple_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata); - -extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); -extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); -extern const struct file_operations simple_dir_operations; -extern const struct inode_operations simple_dir_inode_operations; -struct tree_descr { char *name; const struct file_operations *ops; int mode; }; -struct dentry *d_alloc_name(struct dentry *, const char *); -extern int simple_fill_super(struct super_block *, int, struct tree_descr *); -extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); -extern void simple_release_fs(struct vfsmount **mount, int *count); - -extern ssize_t simple_read_from_buffer(void __user *to, size_t count, - loff_t *ppos, const void *from, size_t available); - -#ifdef CONFIG_MIGRATION -extern int buffer_migrate_page(struct address_space *, - struct page *, struct page *); -#else -#define buffer_migrate_page NULL -#endif - -extern int inode_change_ok(struct inode *, struct iattr *); -extern int __must_check inode_setattr(struct inode *, struct iattr *); - -extern void file_update_time(struct file *file); - -extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); -extern void save_mount_options(struct super_block *sb, char *options); - -static inline ino_t parent_ino(struct dentry *dentry) -{ - ino_t res; - - spin_lock(&dentry->d_lock); - res = dentry->d_parent->d_inode->i_ino; - spin_unlock(&dentry->d_lock); - return res; -} - -/* Transaction based IO helpers */ - -/* - * An argresp is stored in an allocated page and holds the - * size of the argument or response, along with its content - */ -struct simple_transaction_argresp { - ssize_t size; - char data[0]; -}; - -#define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) - -char *simple_transaction_get(struct file *file, const char __user *buf, - size_t size); -ssize_t simple_transaction_read(struct file *file, char __user *buf, - size_t size, loff_t *pos); -int simple_transaction_release(struct inode *inode, struct file *file); - -static inline void simple_transaction_set(struct file *file, size_t n) -{ - struct simple_transaction_argresp *ar = file->private_data; - - BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); - - /* - * The barrier ensures that ar->size will really remain zero until - * ar->data is ready for reading. - */ - smp_mb(); - ar->size = n; -} - -/* - * simple attribute files - * - * These attributes behave similar to those in sysfs: - * - * Writing to an attribute immediately sets a value, an open file can be - * written to multiple times. - * - * Reading from an attribute creates a buffer from the value that might get - * read with multiple read calls. When the attribute has been read - * completely, no further read calls are possible until the file is opened - * again. - * - * All attributes contain a text representation of a numeric value - * that are accessed with the get() and set() functions. - */ -#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ -static int __fops ## _open(struct inode *inode, struct file *file) \ -{ \ - __simple_attr_check_format(__fmt, 0ull); \ - return simple_attr_open(inode, file, __get, __set, __fmt); \ -} \ -static struct file_operations __fops = { \ - .owner = THIS_MODULE, \ - .open = __fops ## _open, \ - .release = simple_attr_release, \ - .read = simple_attr_read, \ - .write = simple_attr_write, \ -}; - -static inline void __attribute__((format(printf, 1, 2))) -__simple_attr_check_format(const char *fmt, ...) -{ - /* don't do anything, just let the compiler check the arguments; */ -} - -int simple_attr_open(struct inode *inode, struct file *file, - int (*get)(void *, u64 *), int (*set)(void *, u64), - const char *fmt); -int simple_attr_release(struct inode *inode, struct file *file); -ssize_t simple_attr_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos); -ssize_t simple_attr_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos); - - -#ifdef CONFIG_SECURITY -static inline char *alloc_secdata(void) -{ - return (char *)get_zeroed_page(GFP_KERNEL); -} - -static inline void free_secdata(void *secdata) -{ - free_page((unsigned long)secdata); -} -#else -static inline char *alloc_secdata(void) -{ - return (char *)1; -} - -static inline void free_secdata(void *secdata) -{ } -#endif /* CONFIG_SECURITY */ - -struct ctl_table; -int proc_nr_files(struct ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos); - -int get_filesystem_list(char * buf); - -#endif /* __KERNEL__ */ -#endif /* _LINUX_FS_H */ diff -Nurb linux-2.6.27-720/include/linux/highmem.h linux-2.6.27-710/include/linux/highmem.h --- linux-2.6.27-720/include/linux/highmem.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/highmem.h 2008-10-09 18:13:53.000000000 -0400 @@ -165,8 +165,8 @@ { char *vfrom, *vto; - vfrom = (char*)kmap_atomic(from, KM_USER0); - vto = (char*)kmap_atomic(to, KM_USER1); + vfrom = kmap_atomic(from, KM_USER0); + vto = kmap_atomic(to, KM_USER1); copy_user_page(vto, vfrom, vaddr, to); kunmap_atomic(vfrom, KM_USER0); kunmap_atomic(vto, KM_USER1); @@ -178,8 +178,8 @@ { char *vfrom, *vto; - vfrom = (char*)kmap_atomic(from, KM_USER0); - vto = (char*)kmap_atomic(to, KM_USER1); + vfrom = kmap_atomic(from, KM_USER0); + vto = kmap_atomic(to, KM_USER1); copy_page(vto, vfrom); kunmap_atomic(vfrom, KM_USER0); kunmap_atomic(vto, KM_USER1); diff -Nurb linux-2.6.27-720/include/linux/hrtimer.h linux-2.6.27-710/include/linux/hrtimer.h --- linux-2.6.27-720/include/linux/hrtimer.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/hrtimer.h 2008-10-09 18:13:53.000000000 -0400 @@ -384,7 +384,7 @@ static inline void timer_stats_account_hrtimer(struct hrtimer *timer) { timer_stats_update_stats(timer, timer->start_pid, timer->start_site, - (void *) timer->function, timer->start_comm, 0); + timer->function, timer->start_comm, 0); } extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, diff -Nurb linux-2.6.27-720/include/linux/inetdevice.h linux-2.6.27-710/include/linux/inetdevice.h --- linux-2.6.27-720/include/linux/inetdevice.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/inetdevice.h 2008-10-09 18:13:53.000000000 -0400 @@ -166,7 +166,7 @@ static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev) { - struct in_device *in_dev = (struct in_device *) dev->ip_ptr; + struct in_device *in_dev = dev->ip_ptr; if (in_dev) in_dev = rcu_dereference(in_dev); return in_dev; diff -Nurb linux-2.6.27-720/include/linux/jhash.h linux-2.6.27-710/include/linux/jhash.h --- linux-2.6.27-720/include/linux/jhash.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/jhash.h 2008-10-09 18:13:53.000000000 -0400 @@ -44,7 +44,7 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) { u32 a, b, c, len; - const u8 *k = (const u8 *) key; + const u8 *k = key; len = length; a = b = JHASH_GOLDEN_RATIO; diff -Nurb linux-2.6.27-720/include/linux/kernel.h linux-2.6.27-710/include/linux/kernel.h --- linux-2.6.27-720/include/linux/kernel.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/kernel.h 2008-10-09 18:13:53.000000000 -0400 @@ -213,7 +213,7 @@ { return false; } #endif -asmlinkage void __attribute__((format(printf, 1, 2))) +extern void asmlinkage __attribute__((format(printf, 1, 2))) early_printk(const char *fmt, ...); unsigned long int_sqrt(unsigned long); diff -Nurb linux-2.6.27-720/include/linux/ktime.h linux-2.6.27-710/include/linux/ktime.h --- linux-2.6.27-720/include/linux/ktime.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/ktime.h 2008-10-09 18:13:53.000000000 -0400 @@ -71,12 +71,6 @@ #if (BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR) -#ifdef __cplusplus -# define KTIME_TV64(__s) ({ ktime_t __kt; __kt.tv64 = (__s); __kt; }) -#else -# define KTIME_TV64(__s) ((ktime_t) { .tv64 = (__s) }) -#endif - /** * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value * @secs: seconds to set @@ -88,37 +82,32 @@ { #if (BITS_PER_LONG == 64) if (unlikely(secs >= KTIME_SEC_MAX)) - return KTIME_TV64(KTIME_MAX); + return (ktime_t){ .tv64 = KTIME_MAX }; #endif - return KTIME_TV64((s64)secs * NSEC_PER_SEC + (s64)nsecs); + return (ktime_t) { .tv64 = (s64)secs * NSEC_PER_SEC + (s64)nsecs }; } /* Subtract two ktime_t variables. rem = lhs -rhs: */ #define ktime_sub(lhs, rhs) \ - KTIME_TV64((lhs).tv64 - (rhs).tv64) + ({ (ktime_t){ .tv64 = (lhs).tv64 - (rhs).tv64 }; }) /* Add two ktime_t variables. res = lhs + rhs: */ #define ktime_add(lhs, rhs) \ - KTIME_TV64((lhs).tv64 + (rhs).tv64) + ({ (ktime_t){ .tv64 = (lhs).tv64 + (rhs).tv64 }; }) /* * Add a ktime_t variable and a scalar nanosecond value. * res = kt + nsval: */ #define ktime_add_ns(kt, nsval) \ - KTIME_TV64((kt).tv64 + (nsval)) + ({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; }) /* * Subtract a scalar nanosecod from a ktime_t variable * res = kt - nsval: */ -#ifdef __cplusplus -#define ktime_sub_ns(kt, nsval) \ - ({ktime_t duh; duh.tv64 = (kt).tv64 - (nsval); duh; }) -#else #define ktime_sub_ns(kt, nsval) \ ({ (ktime_t){ .tv64 = (kt).tv64 - (nsval) }; }) -#endif /* convert a timespec to ktime_t format: */ static inline ktime_t timespec_to_ktime(struct timespec ts) @@ -143,18 +132,6 @@ #else -#ifdef __cplusplus -# define KTIME_TV64(__s) ({ ktime_t __kt; __kt.tv64 = (__s); __kt; }) -# define KTIME_SEC_NSEC(__sec, __nsec) ({ ktime_t __kt; __kt.tv.sec = (__sec); __kt.tv.nsec = (__nsec); __kt; }) -# define TIMEVAL_SEC_USEC(__sec, __usec) ({ struct timeval __tv; __tv.tv_sec = (__sec); __tv.tv_usec = (__usec); __tv; }) -# define TIMESPEC_SEC_NSEC(__sec, __nsec) ({ struct timespec __ts; __ts.tv_sec = (__sec); __ts.tv_nsec = (__nsec); __ts; }) -#else -# define KTIME_TV64(__s) ((ktime_t) { .tv64 = (__s) }) -# define KTIME_SEC_NSEC(__sec, __nsec) ((ktime_t) { .tv = { .sec = (__sec), .nsec = (__nsec) } }) -# define TIMEVAL_SEC_USEC(__sec, __usec) ((struct timeval) { .tv_sec = (__sec), .tv_usec = (__usec) }) -# define TIMESPEC_SEC_NSEC(__sec, __nsec) ((struct timespec) { .tv_sec = (__sec), .tv_nsec = (__nsec) }) -#endif - /* * Helper macros/inlines to get the ktime_t math right in the timespec * representation. The macros are sometimes ugly - their actual use is @@ -173,7 +150,7 @@ /* Set a ktime_t variable to a value in sec/nsec representation: */ static inline ktime_t ktime_set(const long secs, const unsigned long nsecs) { - return KTIME_SEC_NSEC(secs, nsecs); + return (ktime_t) { .tv = { .sec = secs, .nsec = nsecs } }; } /** @@ -246,7 +223,8 @@ */ static inline ktime_t timespec_to_ktime(const struct timespec ts) { - return KTIME_SEC_NSEC((s32)ts.tv_sec, (s32)ts.tv_nsec); + return (ktime_t) { .tv = { .sec = (s32)ts.tv_sec, + .nsec = (s32)ts.tv_nsec } }; } /** @@ -257,7 +235,8 @@ */ static inline ktime_t timeval_to_ktime(const struct timeval tv) { - return KTIME_SEC_NSEC((s32)tv.tv_sec, (s32)tv.tv_usec * 1000); + return (ktime_t) { .tv = { .sec = (s32)tv.tv_sec, + .nsec = (s32)tv.tv_usec * 1000 } }; } /** @@ -268,7 +247,8 @@ */ static inline struct timespec ktime_to_timespec(const ktime_t kt) { - return TIMESPEC_SEC_NSEC((time_t) kt.tv.sec, (long) kt.tv.nsec); + return (struct timespec) { .tv_sec = (time_t) kt.tv.sec, + .tv_nsec = (long) kt.tv.nsec }; } /** @@ -279,8 +259,9 @@ */ static inline struct timeval ktime_to_timeval(const ktime_t kt) { - return TIMEVAL_SEC_USEC((time_t) kt.tv.sec, - (suseconds_t) (kt.tv.nsec / NSEC_PER_USEC)); + return (struct timeval) { + .tv_sec = (time_t) kt.tv.sec, + .tv_usec = (suseconds_t) (kt.tv.nsec / NSEC_PER_USEC) }; } /** @@ -348,7 +329,7 @@ static inline ktime_t ns_to_ktime(u64 ns) { - static const ktime_t ktime_zero = ({ktime_t duh; duh.tv64 = 0;duh;}); + static const ktime_t ktime_zero = { .tv64 = 0 }; return ktime_add_ns(ktime_zero, ns); } diff -Nurb linux-2.6.27-720/include/linux/linkage.h linux-2.6.27-710/include/linux/linkage.h --- linux-2.6.27-720/include/linux/linkage.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/linkage.h 2008-10-09 18:13:53.000000000 -0400 @@ -11,13 +11,6 @@ #else #define CPP_ASMLINKAGE #endif -#ifndef extern_asmlinkage -# ifdef __cplusplus -# define extern_asmlinkage asmlinkage -# else -# define extern_asmlinkage extern asmlinkage -# endif -#endif #ifndef asmlinkage #define asmlinkage CPP_ASMLINKAGE diff -Nurb linux-2.6.27-720/include/linux/list.h linux-2.6.27-710/include/linux/list.h --- linux-2.6.27-720/include/linux/list.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/list.h 2008-10-09 18:13:53.000000000 -0400 @@ -20,11 +20,7 @@ struct list_head *next, *prev; }; -#ifdef __cplusplus -#define LIST_HEAD_INIT(name) ({struct list_head duh;duh.next=&(name);duh.prev=&(name);duh;}) -#else #define LIST_HEAD_INIT(name) { &(name), &(name) } -#endif #define LIST_HEAD(name) \ struct list_head name = LIST_HEAD_INIT(name) @@ -107,8 +103,8 @@ static inline void list_del(struct list_head *entry) { __list_del(entry->prev, entry->next); - entry->next = (struct list_head*)(LIST_POISON1); - entry->prev = (struct list_head*)(LIST_POISON2); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; } #else extern void list_del(struct list_head *entry); @@ -580,8 +576,8 @@ static inline void hlist_del(struct hlist_node *n) { __hlist_del(n); - n->next = (struct hlist_node*)(LIST_POISON1); - n->pprev = (struct hlist_node**)(LIST_POISON2); + n->next = LIST_POISON1; + n->pprev = LIST_POISON2; } static inline void hlist_del_init(struct hlist_node *n) diff -Nurb linux-2.6.27-720/include/linux/mempolicy.h linux-2.6.27-710/include/linux/mempolicy.h --- linux-2.6.27-720/include/linux/mempolicy.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/mempolicy.h 2008-10-09 18:13:53.000000000 -0400 @@ -241,7 +241,7 @@ #else -EMPTY_STRUCT_DECL(mempolicy); +struct mempolicy {}; static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b) { @@ -271,7 +271,7 @@ return NULL; } -EMPTY_STRUCT_DECL(shared_policy); +struct shared_policy {}; static inline int mpol_set_shared_policy(struct shared_policy *info, struct vm_area_struct *vma, diff -Nurb linux-2.6.27-720/include/linux/mm.h linux-2.6.27-710/include/linux/mm.h --- linux-2.6.27-720/include/linux/mm.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/mm.h 2009-05-04 12:15:01.000000000 -0400 @@ -326,7 +326,7 @@ static inline void set_compound_page_dtor(struct page *page, compound_page_dtor *dtor) { - page[1].lru.next = (struct list_head *)dtor; + page[1].lru.next = (void *)dtor; } static inline compound_page_dtor *get_compound_page_dtor(struct page *page) @@ -343,7 +343,7 @@ static inline void set_compound_order(struct page *page, unsigned long order) { - page[1].lru.prev = (struct list_head *)order; + page[1].lru.prev = (void *)order; } /* @@ -493,7 +493,7 @@ static inline enum zone_type page_zonenum(struct page *page) { - return (enum zone_type) ((page->flags >> ZONES_PGSHIFT) & ZONES_MASK); + return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } /* diff -Nurb linux-2.6.27-720/include/linux/mm.h.orig linux-2.6.27-710/include/linux/mm.h.orig --- linux-2.6.27-720/include/linux/mm.h.orig 2009-05-04 12:15:01.000000000 -0400 +++ linux-2.6.27-710/include/linux/mm.h.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,1289 +0,0 @@ -#ifndef _LINUX_MM_H -#define _LINUX_MM_H - -#include - -#ifdef __KERNEL__ - -#include -#include -#include -#include -#include -#include -#include - -struct mempolicy; -struct anon_vma; -struct file_ra_state; -struct user_struct; -struct writeback_control; - -#ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ -extern unsigned long max_mapnr; -#endif - -extern unsigned long num_physpages; -extern void * high_memory; -extern int page_cluster; - -#ifdef CONFIG_SYSCTL -extern int sysctl_legacy_va_layout; -#else -#define sysctl_legacy_va_layout 0 -#endif - -extern unsigned long mmap_min_addr; - -#include -#include -#include - -#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) - -/* - * Linux kernel virtual memory manager primitives. - * The idea being to have a "virtual" mm in the same way - * we have a virtual fs - giving a cleaner interface to the - * mm details, and allowing different kinds of memory mappings - * (from shared memory to executable loading to arbitrary - * mmap() functions). - */ - -extern struct kmem_cache *vm_area_cachep; - -/* - * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is - * disabled, then there's a single shared list of VMAs maintained by the - * system, and mm's subscribe to these individually - */ -struct vm_list_struct { - struct vm_list_struct *next; - struct vm_area_struct *vma; -}; - -#ifndef CONFIG_MMU -extern struct rb_root nommu_vma_tree; -extern struct rw_semaphore nommu_vma_sem; - -extern unsigned int kobjsize(const void *objp); -#endif - -/* - * vm_flags in vm_area_struct, see mm_types.h. - */ -#define VM_READ 0x00000001 /* currently active flags */ -#define VM_WRITE 0x00000002 -#define VM_EXEC 0x00000004 -#define VM_SHARED 0x00000008 - -/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ -#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */ -#define VM_MAYWRITE 0x00000020 -#define VM_MAYEXEC 0x00000040 -#define VM_MAYSHARE 0x00000080 - -#define VM_GROWSDOWN 0x00000100 /* general info on the segment */ -#define VM_GROWSUP 0x00000200 -#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ -#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ - -#define VM_EXECUTABLE 0x00001000 -#define VM_LOCKED 0x00002000 -#define VM_IO 0x00004000 /* Memory mapped I/O or similar */ - - /* Used by sys_madvise() */ -#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ -#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ - -#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ -#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ -#define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */ -#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ -#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ -#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ -#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ -#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ -#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ -#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ - -#define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ -#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ -#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ - -#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ -#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS -#endif - -#ifdef CONFIG_STACK_GROWSUP -#define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) -#else -#define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) -#endif - -#define VM_READHINTMASK (VM_SEQ_READ | VM_RAND_READ) -#define VM_ClearReadHint(v) (v)->vm_flags &= ~VM_READHINTMASK -#define VM_NormalReadHint(v) (!((v)->vm_flags & VM_READHINTMASK)) -#define VM_SequentialReadHint(v) ((v)->vm_flags & VM_SEQ_READ) -#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) - -/* - * mapping from the currently active vm_flags protection bits (the - * low four bits) to a page protection mask.. - */ -extern pgprot_t protection_map[16]; - -#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ -#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ - - -/* - * vm_fault is filled by the the pagefault handler and passed to the vma's - * ->fault function. The vma's ->fault is responsible for returning a bitmask - * of VM_FAULT_xxx flags that give details about how the fault was handled. - * - * pgoff should be used in favour of virtual_address, if possible. If pgoff - * is used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get nonlinear - * mapping support. - */ -struct vm_fault { - unsigned int flags; /* FAULT_FLAG_xxx flags */ - pgoff_t pgoff; /* Logical page offset based on vma */ - void __user *virtual_address; /* Faulting virtual address */ - - struct page *page; /* ->fault handlers should return a - * page here, unless VM_FAULT_NOPAGE - * is set (which is also implied by - * VM_FAULT_ERROR). - */ -}; - -/* - * These are the virtual MM functions - opening of an area, closing and - * unmapping it (needed to keep files on disk up-to-date etc), pointer - * to the functions called when a no-page or a wp-page exception occurs. - */ -struct vm_operations_struct { - void (*open)(struct vm_area_struct * area); - void (*close)(struct vm_area_struct * area); - int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); - - /* notification that a previously read-only page is about to become - * writable, if an error is returned it will cause a SIGBUS */ - int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page); - - /* called by access_process_vm when get_user_pages() fails, typically - * for use by special VMAs that can switch between memory and hardware - */ - int (*access)(struct vm_area_struct *vma, unsigned long addr, - void *buf, int len, int write); -#ifdef CONFIG_NUMA - /* - * set_policy() op must add a reference to any non-NULL @new mempolicy - * to hold the policy upon return. Caller should pass NULL @new to - * remove a policy and fall back to surrounding context--i.e. do not - * install a MPOL_DEFAULT policy, nor the task or system default - * mempolicy. - */ - int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); - - /* - * get_policy() op must add reference [mpol_get()] to any policy at - * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure - * in mm/mempolicy.c will do this automatically. - * get_policy() must NOT add a ref if the policy at (vma,addr) is not - * marked as MPOL_SHARED. vma policies are protected by the mmap_sem. - * If no [shared/vma] mempolicy exists at the addr, get_policy() op - * must return NULL--i.e., do not "fallback" to task or system default - * policy. - */ - struct mempolicy *(*get_policy)(struct vm_area_struct *vma, - unsigned long addr); - int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from, - const nodemask_t *to, unsigned long flags); -#endif -}; - -struct mmu_gather; -struct inode; - -#define page_private(page) ((page)->private) -#define set_page_private(page, v) ((page)->private = (v)) - -/* - * FIXME: take this include out, include page-flags.h in - * files which need it (119 of them) - */ -#include - -#ifdef CONFIG_DEBUG_VM -#define VM_BUG_ON(cond) BUG_ON(cond) -#else -#define VM_BUG_ON(condition) do { } while(0) -#endif - -/* - * Methods to modify the page usage count. - * - * What counts for a page usage: - * - cache mapping (page->mapping) - * - private data (page->private) - * - page mapped in a task's page tables, each mapping - * is counted separately - * - * Also, many kernel routines increase the page count before a critical - * routine so they can be sure the page doesn't go away from under them. - */ - -/* - * Drop a ref, return true if the refcount fell to zero (the page has no users) - */ -static inline int put_page_testzero(struct page *page) -{ - VM_BUG_ON(atomic_read(&page->_count) == 0); - return atomic_dec_and_test(&page->_count); -} - -/* - * Try to grab a ref unless the page has a refcount of zero, return false if - * that is the case. - */ -static inline int get_page_unless_zero(struct page *page) -{ - return atomic_inc_not_zero(&page->_count); -} - -/* Support for virtually mapped pages */ -struct page *vmalloc_to_page(const void *addr); -unsigned long vmalloc_to_pfn(const void *addr); - -/* - * Determine if an address is within the vmalloc range - * - * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there - * is no special casing required. - */ -static inline int is_vmalloc_addr(const void *x) -{ -#ifdef CONFIG_MMU - unsigned long addr = (unsigned long)x; - - return addr >= VMALLOC_START && addr < VMALLOC_END; -#else - return 0; -#endif -} - -static inline struct page *compound_head(struct page *page) -{ - if (unlikely(PageTail(page))) - return page->first_page; - return page; -} - -static inline int page_count(struct page *page) -{ - return atomic_read(&compound_head(page)->_count); -} - -static inline void get_page(struct page *page) -{ - page = compound_head(page); - VM_BUG_ON(atomic_read(&page->_count) == 0); - atomic_inc(&page->_count); -} - -static inline struct page *virt_to_head_page(const void *x) -{ - struct page *page = virt_to_page(x); - return compound_head(page); -} - -/* - * Setup the page count before being freed into the page allocator for - * the first time (boot or memory hotplug) - */ -static inline void init_page_count(struct page *page) -{ - atomic_set(&page->_count, 1); -} - -void put_page(struct page *page); -void put_pages_list(struct list_head *pages); - -void split_page(struct page *page, unsigned int order); - -/* - * Compound pages have a destructor function. Provide a - * prototype for that function and accessor functions. - * These are _only_ valid on the head of a PG_compound page. - */ -typedef void compound_page_dtor(struct page *); - -static inline void set_compound_page_dtor(struct page *page, - compound_page_dtor *dtor) -{ - page[1].lru.next = (void *)dtor; -} - -static inline compound_page_dtor *get_compound_page_dtor(struct page *page) -{ - return (compound_page_dtor *)page[1].lru.next; -} - -static inline int compound_order(struct page *page) -{ - if (!PageHead(page)) - return 0; - return (unsigned long)page[1].lru.prev; -} - -static inline void set_compound_order(struct page *page, unsigned long order) -{ - page[1].lru.prev = (void *)order; -} - -/* - * Multiple processes may "see" the same page. E.g. for untouched - * mappings of /dev/null, all processes see the same page full of - * zeroes, and text pages of executables and shared libraries have - * only one copy in memory, at most, normally. - * - * For the non-reserved pages, page_count(page) denotes a reference count. - * page_count() == 0 means the page is free. page->lru is then used for - * freelist management in the buddy allocator. - * page_count() > 0 means the page has been allocated. - * - * Pages are allocated by the slab allocator in order to provide memory - * to kmalloc and kmem_cache_alloc. In this case, the management of the - * page, and the fields in 'struct page' are the responsibility of mm/slab.c - * unless a particular usage is carefully commented. (the responsibility of - * freeing the kmalloc memory is the caller's, of course). - * - * A page may be used by anyone else who does a __get_free_page(). - * In this case, page_count still tracks the references, and should only - * be used through the normal accessor functions. The top bits of page->flags - * and page->virtual store page management information, but all other fields - * are unused and could be used privately, carefully. The management of this - * page is the responsibility of the one who allocated it, and those who have - * subsequently been given references to it. - * - * The other pages (we may call them "pagecache pages") are completely - * managed by the Linux memory manager: I/O, buffers, swapping etc. - * The following discussion applies only to them. - * - * A pagecache page contains an opaque `private' member, which belongs to the - * page's address_space. Usually, this is the address of a circular list of - * the page's disk buffers. PG_private must be set to tell the VM to call - * into the filesystem to release these pages. - * - * A page may belong to an inode's memory mapping. In this case, page->mapping - * is the pointer to the inode, and page->index is the file offset of the page, - * in units of PAGE_CACHE_SIZE. - * - * If pagecache pages are not associated with an inode, they are said to be - * anonymous pages. These may become associated with the swapcache, and in that - * case PG_swapcache is set, and page->private is an offset into the swapcache. - * - * In either case (swapcache or inode backed), the pagecache itself holds one - * reference to the page. Setting PG_private should also increment the - * refcount. The each user mapping also has a reference to the page. - * - * The pagecache pages are stored in a per-mapping radix tree, which is - * rooted at mapping->page_tree, and indexed by offset. - * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space - * lists, we instead now tag pages as dirty/writeback in the radix tree. - * - * All pagecache pages may be subject to I/O: - * - inode pages may need to be read from disk, - * - inode pages which have been modified and are MAP_SHARED may need - * to be written back to the inode on disk, - * - anonymous pages (including MAP_PRIVATE file mappings) which have been - * modified may need to be swapped out to swap space and (later) to be read - * back into memory. - */ - -/* - * The zone field is never updated after free_area_init_core() - * sets it, so none of the operations on it need to be atomic. - */ - - -/* - * page->flags layout: - * - * There are three possibilities for how page->flags get - * laid out. The first is for the normal case, without - * sparsemem. The second is for sparsemem when there is - * plenty of space for node and section. The last is when - * we have run out of space and have to fall back to an - * alternate (slower) way of determining the node. - * - * No sparsemem or sparsemem vmemmap: | NODE | ZONE | ... | FLAGS | - * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS | - * classic sparse no space for node: | SECTION | ZONE | ... | FLAGS | - */ -#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) -#define SECTIONS_WIDTH SECTIONS_SHIFT -#else -#define SECTIONS_WIDTH 0 -#endif - -#define ZONES_WIDTH ZONES_SHIFT - -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS -#define NODES_WIDTH NODES_SHIFT -#else -#ifdef CONFIG_SPARSEMEM_VMEMMAP -#error "Vmemmap: No space for nodes field in page flags" -#endif -#define NODES_WIDTH 0 -#endif - -/* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */ -#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) -#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) -#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) - -/* - * We are going to use the flags for the page to node mapping if its in - * there. This includes the case where there is no node, so it is implicit. - */ -#if !(NODES_WIDTH > 0 || NODES_SHIFT == 0) -#define NODE_NOT_IN_PAGE_FLAGS -#endif - -#ifndef PFN_SECTION_SHIFT -#define PFN_SECTION_SHIFT 0 -#endif - -/* - * Define the bit shifts to access each section. For non-existant - * sections we define the shift as 0; that plus a 0 mask ensures - * the compiler will optimise away reference to them. - */ -#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) -#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) -#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) - -/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allcator */ -#ifdef NODE_NOT_IN_PAGEFLAGS -#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) -#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \ - SECTIONS_PGOFF : ZONES_PGOFF) -#else -#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT) -#define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF)? \ - NODES_PGOFF : ZONES_PGOFF) -#endif - -#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) - -#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS -#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS -#endif - -#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) -#define NODES_MASK ((1UL << NODES_WIDTH) - 1) -#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) -#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) - -static inline enum zone_type page_zonenum(struct page *page) -{ - return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; -} - -/* - * The identification function is only used by the buddy allocator for - * determining if two pages could be buddies. We are not really - * identifying a zone since we could be using a the section number - * id if we have not node id available in page flags. - * We guarantee only that it will return the same value for two - * combinable pages in a zone. - */ -static inline int page_zone_id(struct page *page) -{ - return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK; -} - -static inline int zone_to_nid(struct zone *zone) -{ -#ifdef CONFIG_NUMA - return zone->node; -#else - return 0; -#endif -} - -#ifdef NODE_NOT_IN_PAGE_FLAGS -extern int page_to_nid(struct page *page); -#else -static inline int page_to_nid(struct page *page) -{ - return (page->flags >> NODES_PGSHIFT) & NODES_MASK; -} -#endif - -static inline struct zone *page_zone(struct page *page) -{ - return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; -} - -#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) -static inline unsigned long page_to_section(struct page *page) -{ - return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; -} -#endif - -static inline void set_page_zone(struct page *page, enum zone_type zone) -{ - page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); - page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; -} - -static inline void set_page_node(struct page *page, unsigned long node) -{ - page->flags &= ~(NODES_MASK << NODES_PGSHIFT); - page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; -} - -static inline void set_page_section(struct page *page, unsigned long section) -{ - page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); - page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; -} - -static inline void set_page_links(struct page *page, enum zone_type zone, - unsigned long node, unsigned long pfn) -{ - set_page_zone(page, zone); - set_page_node(page, node); - set_page_section(page, pfn_to_section_nr(pfn)); -} - -/* - * If a hint addr is less than mmap_min_addr change hint to be as - * low as possible but still greater than mmap_min_addr - */ -static inline unsigned long round_hint_to_min(unsigned long hint) -{ -#ifdef CONFIG_SECURITY - hint &= PAGE_MASK; - if (((void *)hint != NULL) && - (hint < mmap_min_addr)) - return PAGE_ALIGN(mmap_min_addr); -#endif - return hint; -} - -/* - * Some inline functions in vmstat.h depend on page_zone() - */ -#include - -static __always_inline void *lowmem_page_address(struct page *page) -{ - return __va(page_to_pfn(page) << PAGE_SHIFT); -} - -#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) -#define HASHED_PAGE_VIRTUAL -#endif - -#if defined(WANT_PAGE_VIRTUAL) -#define page_address(page) ((page)->virtual) -#define set_page_address(page, address) \ - do { \ - (page)->virtual = (address); \ - } while(0) -#define page_address_init() do { } while(0) -#endif - -#if defined(HASHED_PAGE_VIRTUAL) -void *page_address(struct page *page); -void set_page_address(struct page *page, void *virtual); -void page_address_init(void); -#endif - -#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL) -#define page_address(page) lowmem_page_address(page) -#define set_page_address(page, address) do { } while(0) -#define page_address_init() do { } while(0) -#endif - -/* - * On an anonymous page mapped into a user virtual memory area, - * page->mapping points to its anon_vma, not to a struct address_space; - * with the PAGE_MAPPING_ANON bit set to distinguish it. - * - * Please note that, confusingly, "page_mapping" refers to the inode - * address_space which maps the page from disk; whereas "page_mapped" - * refers to user virtual address space into which the page is mapped. - */ -#define PAGE_MAPPING_ANON 1 - -extern struct address_space swapper_space; -static inline struct address_space *page_mapping(struct page *page) -{ - struct address_space *mapping = page->mapping; - - VM_BUG_ON(PageSlab(page)); -#ifdef CONFIG_SWAP - if (unlikely(PageSwapCache(page))) - mapping = &swapper_space; - else -#endif - if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON)) - mapping = NULL; - return mapping; -} - -static inline int PageAnon(struct page *page) -{ - return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; -} - -/* - * Return the pagecache index of the passed page. Regular pagecache pages - * use ->index whereas swapcache pages use ->private - */ -static inline pgoff_t page_index(struct page *page) -{ - if (unlikely(PageSwapCache(page))) - return page_private(page); - return page->index; -} - -/* - * The atomic page->_mapcount, like _count, starts from -1: - * so that transitions both from it and to it can be tracked, - * using atomic_inc_and_test and atomic_add_negative(-1). - */ -static inline void reset_page_mapcount(struct page *page) -{ - atomic_set(&(page)->_mapcount, -1); -} - -static inline int page_mapcount(struct page *page) -{ - return atomic_read(&(page)->_mapcount) + 1; -} - -/* - * Return true if this page is mapped into pagetables. - */ -static inline int page_mapped(struct page *page) -{ - return atomic_read(&(page)->_mapcount) >= 0; -} - -/* - * Different kinds of faults, as returned by handle_mm_fault(). - * Used to decide whether a process gets delivered SIGBUS or - * just gets major/minor fault counters bumped up. - */ - -#define VM_FAULT_MINOR 0 /* For backwards compat. Remove me quickly. */ - -#define VM_FAULT_OOM 0x0001 -#define VM_FAULT_SIGBUS 0x0002 -#define VM_FAULT_MAJOR 0x0004 -#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ - -#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ -#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ - -#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) - -#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) - -extern void show_free_areas(void); - -#ifdef CONFIG_SHMEM -int shmem_lock(struct file *file, int lock, struct user_struct *user); -#else -static inline int shmem_lock(struct file *file, int lock, - struct user_struct *user) -{ - return 0; -} -#endif -struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags); - -int shmem_zero_setup(struct vm_area_struct *); - -#ifndef CONFIG_MMU -extern unsigned long shmem_get_unmapped_area(struct file *file, - unsigned long addr, - unsigned long len, - unsigned long pgoff, - unsigned long flags); -#endif - -extern int can_do_mlock(void); -extern int user_shm_lock(size_t, struct user_struct *); -extern void user_shm_unlock(size_t, struct user_struct *); - -/* - * Parameter block passed down to zap_pte_range in exceptional cases. - */ -struct zap_details { - struct vm_area_struct *nonlinear_vma; /* Check page->index if set */ - struct address_space *check_mapping; /* Check page->mapping if set */ - pgoff_t first_index; /* Lowest page->index to unmap */ - pgoff_t last_index; /* Highest page->index to unmap */ - spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */ - unsigned long truncate_count; /* Compare vm_truncate_count */ -}; - -struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, - pte_t pte); - -int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, - unsigned long size); -unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, - unsigned long size, struct zap_details *); -unsigned long unmap_vmas(struct mmu_gather **tlb, - struct vm_area_struct *start_vma, unsigned long start_addr, - unsigned long end_addr, unsigned long *nr_accounted, - struct zap_details *); - -/** - * mm_walk - callbacks for walk_page_range - * @pgd_entry: if set, called for each non-empty PGD (top-level) entry - * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry - * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry - * @pte_entry: if set, called for each non-empty PTE (4th-level) entry - * @pte_hole: if set, called for each hole at all levels - * - * (see walk_page_range for more details) - */ -struct mm_walk { - int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, struct mm_walk *); - int (*pud_entry)(pud_t *, unsigned long, unsigned long, struct mm_walk *); - int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *); - int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *); - int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *); - struct mm_struct *mm; - void *private; -}; - -int walk_page_range(unsigned long addr, unsigned long end, - struct mm_walk *walk); -void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, - unsigned long end, unsigned long floor, unsigned long ceiling); -int copy_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma); -void unmap_mapping_range(struct address_space *mapping, - loff_t const holebegin, loff_t const holelen, int even_cows); -int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, - void *buf, int len, int write); - -static inline void unmap_shared_mapping_range(struct address_space *mapping, - loff_t const holebegin, loff_t const holelen) -{ - unmap_mapping_range(mapping, holebegin, holelen, 0); -} - -extern int vmtruncate(struct inode * inode, loff_t offset); -extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); - -#ifdef CONFIG_MMU -extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, int write_access); -#else -static inline int handle_mm_fault(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, - int write_access) -{ - /* should never happen if there's no MMU */ - BUG(); - return VM_FAULT_SIGBUS; -} -#endif - -extern int make_pages_present(unsigned long addr, unsigned long end); -extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); - -int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, - int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); - -extern int try_to_release_page(struct page * page, gfp_t gfp_mask); -extern void do_invalidatepage(struct page *page, unsigned long offset); - -int __set_page_dirty_nobuffers(struct page *page); -int __set_page_dirty_no_writeback(struct page *page); -int redirty_page_for_writepage(struct writeback_control *wbc, - struct page *page); -int set_page_dirty(struct page *page); -int set_page_dirty_lock(struct page *page); -int clear_page_dirty_for_io(struct page *page); - -extern unsigned long move_page_tables(struct vm_area_struct *vma, - unsigned long old_addr, struct vm_area_struct *new_vma, - unsigned long new_addr, unsigned long len); -extern unsigned long do_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr); -extern int mprotect_fixup(struct vm_area_struct *vma, - struct vm_area_struct **pprev, unsigned long start, - unsigned long end, unsigned long newflags); - -/* - * get_user_pages_fast provides equivalent functionality to get_user_pages, - * operating on current and current->mm (force=0 and doesn't return any vmas). - * - * get_user_pages_fast may take mmap_sem and page tables, so no assumptions - * can be made about locking. get_user_pages_fast is to be implemented in a - * way that is advantageous (vs get_user_pages()) when the user memory area is - * already faulted in and present in ptes. However if the pages have to be - * faulted in, it may turn out to be slightly slower). - */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages); - -/* - * A callback you can register to apply pressure to ageable caches. - * - * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should - * look through the least-recently-used 'nr_to_scan' entries and - * attempt to free them up. It should return the number of objects - * which remain in the cache. If it returns -1, it means it cannot do - * any scanning at this time (eg. there is a risk of deadlock). - * - * The 'gfpmask' refers to the allocation we are currently trying to - * fulfil. - * - * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is - * querying the cache size, so a fastpath for that case is appropriate. - */ -struct shrinker { - int (*shrink)(int nr_to_scan, gfp_t gfp_mask); - int seeks; /* seeks to recreate an obj */ - - /* These are for internal use */ - struct list_head list; - long nr; /* objs pending delete */ -}; -#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ -extern void register_shrinker(struct shrinker *); -extern void unregister_shrinker(struct shrinker *); - -int vma_wants_writenotify(struct vm_area_struct *vma); - -extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl); - -#ifdef __PAGETABLE_PUD_FOLDED -static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, - unsigned long address) -{ - return 0; -} -#else -int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); -#endif - -#ifdef __PAGETABLE_PMD_FOLDED -static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, - unsigned long address) -{ - return 0; -} -#else -int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); -#endif - -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); - -/* - * The following ifdef needed to get the 4level-fixup.h header to work. - * Remove it when 4level-fixup.h has been removed. - */ -#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) -static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) -{ - return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? - NULL: pud_offset(pgd, address); -} - -static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) -{ - return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? - NULL: pmd_offset(pud, address); -} -#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ - -#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS -/* - * We tuck a spinlock to guard each pagetable page into its struct page, - * at page->private, with BUILD_BUG_ON to make sure that this will not - * overflow into the next struct page (as it might with DEBUG_SPINLOCK). - * When freeing, reset page->mapping so free_pages_check won't complain. - */ -#define __pte_lockptr(page) &((page)->ptl) -#define pte_lock_init(_page) do { \ - spin_lock_init(__pte_lockptr(_page)); \ -} while (0) -#define pte_lock_deinit(page) ((page)->mapping = NULL) -#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) -#else -/* - * We use mm->page_table_lock to guard all pagetable pages of the mm. - */ -#define pte_lock_init(page) do {} while (0) -#define pte_lock_deinit(page) do {} while (0) -#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) -#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ - -static inline void pgtable_page_ctor(struct page *page) -{ - pte_lock_init(page); - inc_zone_page_state(page, NR_PAGETABLE); -} - -static inline void pgtable_page_dtor(struct page *page) -{ - pte_lock_deinit(page); - dec_zone_page_state(page, NR_PAGETABLE); -} - -#define pte_offset_map_lock(mm, pmd, address, ptlp) \ -({ \ - spinlock_t *__ptl = pte_lockptr(mm, pmd); \ - pte_t *__pte = pte_offset_map(pmd, address); \ - *(ptlp) = __ptl; \ - spin_lock(__ptl); \ - __pte; \ -}) - -#define pte_unmap_unlock(pte, ptl) do { \ - spin_unlock(ptl); \ - pte_unmap(pte); \ -} while (0) - -#define pte_alloc_map(mm, pmd, address) \ - ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ - NULL: pte_offset_map(pmd, address)) - -#define pte_alloc_map_lock(mm, pmd, address, ptlp) \ - ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ - NULL: pte_offset_map_lock(mm, pmd, address, ptlp)) - -#define pte_alloc_kernel(pmd, address) \ - ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ - NULL: pte_offset_kernel(pmd, address)) - -extern void free_area_init(unsigned long * zones_size); -extern void free_area_init_node(int nid, unsigned long * zones_size, - unsigned long zone_start_pfn, unsigned long *zholes_size); -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP -/* - * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its - * zones, allocate the backing mem_map and account for memory holes in a more - * architecture independent manner. This is a substitute for creating the - * zone_sizes[] and zholes_size[] arrays and passing them to - * free_area_init_node() - * - * An architecture is expected to register range of page frames backed by - * physical memory with add_active_range() before calling - * free_area_init_nodes() passing in the PFN each zone ends at. At a basic - * usage, an architecture is expected to do something like - * - * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, - * max_highmem_pfn}; - * for_each_valid_physical_page_range() - * add_active_range(node_id, start_pfn, end_pfn) - * free_area_init_nodes(max_zone_pfns); - * - * If the architecture guarantees that there are no holes in the ranges - * registered with add_active_range(), free_bootmem_active_regions() - * will call free_bootmem_node() for each registered physical page range. - * Similarly sparse_memory_present_with_active_regions() calls - * memory_present() for each range when SPARSEMEM is enabled. - * - * See mm/page_alloc.c for more information on each function exposed by - * CONFIG_ARCH_POPULATES_NODE_MAP - */ -extern void free_area_init_nodes(unsigned long *max_zone_pfn); -extern void add_active_range(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern void remove_active_range(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern void remove_all_active_ranges(void); -extern unsigned long absent_pages_in_range(unsigned long start_pfn, - unsigned long end_pfn); -extern void get_pfn_range_for_nid(unsigned int nid, - unsigned long *start_pfn, unsigned long *end_pfn); -extern unsigned long find_min_pfn_with_active_regions(void); -extern void free_bootmem_with_active_regions(int nid, - unsigned long max_low_pfn); -typedef int (*work_fn_t)(unsigned long, unsigned long, void *); -extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); -extern void sparse_memory_present_with_active_regions(int nid); -#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -extern int early_pfn_to_nid(unsigned long pfn); -#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ -#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ -extern void set_dma_reserve(unsigned long new_dma_reserve); -extern void memmap_init_zone(unsigned long, int, unsigned long, - unsigned long, enum memmap_context); -extern void setup_per_zone_pages_min(void); -extern void mem_init(void); -extern void show_mem(void); -extern void si_meminfo(struct sysinfo * val); -extern void si_meminfo_node(struct sysinfo *val, int nid); -extern int after_bootmem; - -#ifdef CONFIG_NUMA -extern void setup_per_cpu_pageset(void); -#else -static inline void setup_per_cpu_pageset(void) {} -#endif - -/* prio_tree.c */ -void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); -void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); -void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *); -struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma, - struct prio_tree_iter *iter); - -#define vma_prio_tree_foreach(vma, iter, root, begin, end) \ - for (prio_tree_iter_init(iter, root, begin, end), vma = NULL; \ - (vma = vma_prio_tree_next(vma, iter)); ) - -static inline void vma_nonlinear_insert(struct vm_area_struct *vma, - struct list_head *list) -{ - vma->shared.vm_set.parent = NULL; - list_add_tail(&vma->shared.vm_set.list, list); -} - -/* mmap.c */ -extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); -extern struct vm_area_struct *vma_merge(struct mm_struct *, - struct vm_area_struct *prev, unsigned long addr, unsigned long end, - unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, - struct mempolicy *); -extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); -extern int split_vma(struct mm_struct *, - struct vm_area_struct *, unsigned long addr, int new_below); -extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); -extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, - struct rb_node **, struct rb_node *); -extern void unlink_file_vma(struct vm_area_struct *); -extern struct vm_area_struct *copy_vma(struct vm_area_struct **, - unsigned long addr, unsigned long len, pgoff_t pgoff); -extern void exit_mmap(struct mm_struct *); - -extern int mm_take_all_locks(struct mm_struct *mm); -extern void mm_drop_all_locks(struct mm_struct *mm); - -#ifdef CONFIG_PROC_FS -/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */ -extern void added_exe_file_vma(struct mm_struct *mm); -extern void removed_exe_file_vma(struct mm_struct *mm); -#else -static inline void added_exe_file_vma(struct mm_struct *mm) -{} - -static inline void removed_exe_file_vma(struct mm_struct *mm) -{} -#endif /* CONFIG_PROC_FS */ - -extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); -extern int install_special_mapping(struct mm_struct *mm, - unsigned long addr, unsigned long len, - unsigned long flags, struct page **pages); - -extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); - -extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long pgoff); -extern unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, unsigned long flags, - unsigned int vm_flags, unsigned long pgoff, - int accountable); - -static inline unsigned long do_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long offset) -{ - unsigned long ret = -EINVAL; - if ((offset + PAGE_ALIGN(len)) < offset) - goto out; - if (!(offset & ~PAGE_MASK)) - ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); -out: - return ret; -} - -extern int do_munmap(struct mm_struct *, unsigned long, size_t); - -extern unsigned long do_brk(unsigned long, unsigned long); - -/* filemap.c */ -extern unsigned long page_unuse(struct page *); -extern void truncate_inode_pages(struct address_space *, loff_t); -extern void truncate_inode_pages_range(struct address_space *, - loff_t lstart, loff_t lend); - -/* generic vm_area_ops exported for stackable file systems */ -extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); - -/* mm/page-writeback.c */ -int write_one_page(struct page *page, int wait); - -/* readahead.c */ -#define VM_MAX_READAHEAD 128 /* kbytes */ -#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ - -int do_page_cache_readahead(struct address_space *mapping, struct file *filp, - pgoff_t offset, unsigned long nr_to_read); -int force_page_cache_readahead(struct address_space *mapping, struct file *filp, - pgoff_t offset, unsigned long nr_to_read); - -void page_cache_sync_readahead(struct address_space *mapping, - struct file_ra_state *ra, - struct file *filp, - pgoff_t offset, - unsigned long size); - -void page_cache_async_readahead(struct address_space *mapping, - struct file_ra_state *ra, - struct file *filp, - struct page *pg, - pgoff_t offset, - unsigned long size); - -unsigned long max_sane_readahead(unsigned long nr); - -/* Do stack extension */ -extern int expand_stack(struct vm_area_struct *vma, unsigned long address); -#ifdef CONFIG_IA64 -extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); -#endif -extern int expand_stack_downwards(struct vm_area_struct *vma, - unsigned long address); - -/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ -extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); -extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, - struct vm_area_struct **pprev); - -/* Look up the first VMA which intersects the interval start_addr..end_addr-1, - NULL if none. Assume start_addr < end_addr. */ -static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) -{ - struct vm_area_struct * vma = find_vma(mm,start_addr); - - if (vma && end_addr <= vma->vm_start) - vma = NULL; - return vma; -} - -static inline unsigned long vma_pages(struct vm_area_struct *vma) -{ - return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; -} - -pgprot_t vm_get_page_prot(unsigned long vm_flags); -struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); -int remap_pfn_range(struct vm_area_struct *, unsigned long addr, - unsigned long pfn, unsigned long size, pgprot_t); -int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); -int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn); -int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn); - -struct page *follow_page(struct vm_area_struct *, unsigned long address, - unsigned int foll_flags); -#define FOLL_WRITE 0x01 /* check pte is writable */ -#define FOLL_TOUCH 0x02 /* mark page accessed */ -#define FOLL_GET 0x04 /* do get_page on page */ -#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ - -typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, - void *data); -extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, - unsigned long size, pte_fn_t fn, void *data); - -#ifdef CONFIG_PROC_FS -void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); -#else -static inline void vm_stat_account(struct mm_struct *mm, - unsigned long flags, struct file *file, long pages) -{ -} -#endif /* CONFIG_PROC_FS */ - -#ifdef CONFIG_DEBUG_PAGEALLOC -extern int debug_pagealloc_enabled; - -extern void kernel_map_pages(struct page *page, int numpages, int enable); - -static inline void enable_debug_pagealloc(void) -{ - debug_pagealloc_enabled = 1; -} -#ifdef CONFIG_HIBERNATION -extern bool kernel_page_present(struct page *page); -#endif /* CONFIG_HIBERNATION */ -#else -static inline void -kernel_map_pages(struct page *page, int numpages, int enable) {} -static inline void enable_debug_pagealloc(void) -{ -} -#ifdef CONFIG_HIBERNATION -static inline bool kernel_page_present(struct page *page) { return true; } -#endif /* CONFIG_HIBERNATION */ -#endif - -extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); -#ifdef __HAVE_ARCH_GATE_AREA -int in_gate_area_no_task(unsigned long addr); -int in_gate_area(struct task_struct *task, unsigned long addr); -#else -int in_gate_area_no_task(unsigned long addr); -#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) -#endif /* __HAVE_ARCH_GATE_AREA */ - -int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); -unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, - unsigned long lru_pages); - -#ifndef CONFIG_MMU -#define randomize_va_space 0 -#else -extern int randomize_va_space; -#endif - -const char * arch_vma_name(struct vm_area_struct *vma); -void print_vma_addr(char *prefix, unsigned long rip); - -struct page *sparse_mem_map_populate(unsigned long pnum, int nid); -pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); -pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); -pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); -pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); -void *vmemmap_alloc_block(unsigned long size, int node); -void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); -int vmemmap_populate_basepages(struct page *start_page, - unsigned long pages, int node); -int vmemmap_populate(struct page *start_page, unsigned long pages, int node); -void vmemmap_populate_print_last(void); - -#endif /* __KERNEL__ */ -#endif /* _LINUX_MM_H */ diff -Nurb linux-2.6.27-720/include/linux/netdevice.h linux-2.6.27-710/include/linux/netdevice.h --- linux-2.6.27-720/include/linux/netdevice.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/netdevice.h 2009-05-04 12:16:04.000000000 -0400 @@ -735,46 +735,6 @@ /* GARP */ struct garp_port *garp_port; - /* Click polling support */ - /* - * polling is < 0 if the device does not support polling, == 0 if the - * device supports polling but interrupts are on, and > 0 if polling - * is on. - */ - int polling; - int (*poll_on)(struct net_device *); - int (*poll_off)(struct net_device *); - /* - * rx_poll returns to caller a linked list of sk_buff objects received - * by the device. on call, the want argument specifies the number of - * packets wanted. on return, the want argument specifies the number - * of packets actually returned. - */ - struct sk_buff * (*rx_poll)(struct net_device*, int *want); - /* refill rx dma ring using the given sk_buff list. returns 0 if - * successful, or if there are more entries need to be cleaned, - * returns the number of dirty entries. the ptr to the sk_buff list is - * updated by the driver to point to any unused skbs. - */ - int (*rx_refill)(struct net_device*, struct sk_buff**); - /* - * place sk_buff on the transmit ring. returns 0 if successful, 1 - * otherwise - */ - int (*tx_queue)(struct net_device *, struct sk_buff *); - /* - * clean tx dma ring. returns the list of skb objects cleaned - */ - struct sk_buff* (*tx_clean)(struct net_device *); - /* - * start transmission. returns 0 if successful, 1 otherwise - */ - int (*tx_start)(struct net_device *); - /* - * tell device the end of a batch of packets - */ - int (*tx_eob)(struct net_device *); - /* class/net/name entry */ struct device dev; /* space for optional statistics and wireless sysfs groups */ @@ -959,11 +919,6 @@ extern int dev_queue_xmit(struct sk_buff *skb); extern int register_netdevice(struct net_device *dev); extern void unregister_netdevice(struct net_device *dev); - -extern int register_net_in(struct notifier_block *nb); /* Click */ -extern int unregister_net_in(struct notifier_block *nb); /* Click */ -extern int ptype_dispatch(struct sk_buff *skb, unsigned short type); /* Click */ - extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); @@ -1262,10 +1217,7 @@ extern int netif_rx(struct sk_buff *skb); extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 -//extern int netif_receive_skb(struct sk_buff *skb); -#define HAVE___NETIF_RECEIVE_SKB 1 -extern int __netif_receive_skb(struct sk_buff *skb, unsigned short protocol, int ignore_notifiers); - +extern int netif_receive_skb(struct sk_buff *skb); extern void netif_nit_deliver(struct sk_buff *skb); extern int dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); @@ -1406,11 +1358,6 @@ extern void netif_device_attach(struct net_device *dev); -static inline int netif_receive_skb(struct sk_buff *skb) -{ - return __netif_receive_skb(skb, skb->protocol, 0); -} - /* * Network interface message level settings */ diff -Nurb linux-2.6.27-720/include/linux/netdevice.h.orig linux-2.6.27-710/include/linux/netdevice.h.orig --- linux-2.6.27-720/include/linux/netdevice.h.orig 2009-05-04 12:16:04.000000000 -0400 +++ linux-2.6.27-710/include/linux/netdevice.h.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,1732 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions for the Interfaces handler. - * - * Version: @(#)dev.h 1.0.10 08/12/93 - * - * Authors: Ross Biro - * Fred N. van Kempen, - * Corey Minyard - * Donald J. Becker, - * Alan Cox, - * Bjorn Ekwall. - * Pekka Riikonen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Moved to /usr/include/linux for NET3 - */ -#ifndef _LINUX_NETDEVICE_H -#define _LINUX_NETDEVICE_H - -#include -#include -#include - -#ifdef __KERNEL__ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -struct vlan_group; -struct ethtool_ops; -struct netpoll_info; -/* 802.11 specific */ -struct wireless_dev; - /* source back-compat hooks */ -#define SET_ETHTOOL_OPS(netdev,ops) \ - ( (netdev)->ethtool_ops = (ops) ) - -#define HAVE_ALLOC_NETDEV /* feature macro: alloc_xxxdev - functions are available. */ -#define HAVE_FREE_NETDEV /* free_netdev() */ -#define HAVE_NETDEV_PRIV /* netdev_priv() */ - -#define NET_XMIT_SUCCESS 0 -#define NET_XMIT_DROP 1 /* skb dropped */ -#define NET_XMIT_CN 2 /* congestion notification */ -#define NET_XMIT_POLICED 3 /* skb is shot by police */ -#define NET_XMIT_MASK 0xFFFF /* qdisc flags in net/sch_generic.h */ - -/* Backlog congestion levels */ -#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ -#define NET_RX_DROP 1 /* packet dropped */ -#define NET_RX_CN_LOW 2 /* storm alert, just in case */ -#define NET_RX_CN_MOD 3 /* Storm on its way! */ -#define NET_RX_CN_HIGH 4 /* The storm is here */ -#define NET_RX_BAD 5 /* packet dropped due to kernel error */ - -/* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It - * indicates that the device will soon be dropping packets, or already drops - * some packets of the same priority; prompting us to send less aggressively. */ -#define net_xmit_eval(e) ((e) == NET_XMIT_CN? 0 : (e)) -#define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0) - -#endif - -#define MAX_ADDR_LEN 32 /* Largest hardware address length */ - -/* Driver transmit return codes */ -#define NETDEV_TX_OK 0 /* driver took care of packet */ -#define NETDEV_TX_BUSY 1 /* driver tx path was busy*/ -#define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */ - -#ifdef __KERNEL__ - -/* - * Compute the worst case header length according to the protocols - * used. - */ - -#if defined(CONFIG_WLAN_80211) || defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) -# if defined(CONFIG_MAC80211_MESH) -# define LL_MAX_HEADER 128 -# else -# define LL_MAX_HEADER 96 -# endif -#elif defined(CONFIG_TR) -# define LL_MAX_HEADER 48 -#else -# define LL_MAX_HEADER 32 -#endif - -#if !defined(CONFIG_NET_IPIP) && !defined(CONFIG_NET_IPIP_MODULE) && \ - !defined(CONFIG_NET_IPGRE) && !defined(CONFIG_NET_IPGRE_MODULE) && \ - !defined(CONFIG_IPV6_SIT) && !defined(CONFIG_IPV6_SIT_MODULE) && \ - !defined(CONFIG_IPV6_TUNNEL) && !defined(CONFIG_IPV6_TUNNEL_MODULE) -#define MAX_HEADER LL_MAX_HEADER -#else -#define MAX_HEADER (LL_MAX_HEADER + 48) -#endif - -#endif /* __KERNEL__ */ - -/* - * Network device statistics. Akin to the 2.0 ether stats but - * with byte counters. - */ - -struct net_device_stats -{ - unsigned long rx_packets; /* total packets received */ - unsigned long tx_packets; /* total packets transmitted */ - unsigned long rx_bytes; /* total bytes received */ - unsigned long tx_bytes; /* total bytes transmitted */ - unsigned long rx_errors; /* bad packets received */ - unsigned long tx_errors; /* packet transmit problems */ - unsigned long rx_dropped; /* no space in linux buffers */ - unsigned long tx_dropped; /* no space available in linux */ - unsigned long multicast; /* multicast packets received */ - unsigned long collisions; - - /* detailed rx_errors: */ - unsigned long rx_length_errors; - unsigned long rx_over_errors; /* receiver ring buff overflow */ - unsigned long rx_crc_errors; /* recved pkt with crc error */ - unsigned long rx_frame_errors; /* recv'd frame alignment error */ - unsigned long rx_fifo_errors; /* recv'r fifo overrun */ - unsigned long rx_missed_errors; /* receiver missed packet */ - - /* detailed tx_errors */ - unsigned long tx_aborted_errors; - unsigned long tx_carrier_errors; - unsigned long tx_fifo_errors; - unsigned long tx_heartbeat_errors; - unsigned long tx_window_errors; - - /* for cslip etc */ - unsigned long rx_compressed; - unsigned long tx_compressed; -}; - - -/* Media selection options. */ -enum { - IF_PORT_UNKNOWN = 0, - IF_PORT_10BASE2, - IF_PORT_10BASET, - IF_PORT_AUI, - IF_PORT_100BASET, - IF_PORT_100BASETX, - IF_PORT_100BASEFX -}; - -#ifdef __KERNEL__ - -#include -#include - -struct neighbour; -struct neigh_parms; -struct sk_buff; - -struct netif_rx_stats -{ - unsigned total; - unsigned dropped; - unsigned time_squeeze; - unsigned cpu_collision; -}; - -DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat); - -struct dev_addr_list -{ - struct dev_addr_list *next; - u8 da_addr[MAX_ADDR_LEN]; - u8 da_addrlen; - u8 da_synced; - int da_users; - int da_gusers; -}; - -/* - * We tag multicasts with these structures. - */ - -#define dev_mc_list dev_addr_list -#define dmi_addr da_addr -#define dmi_addrlen da_addrlen -#define dmi_users da_users -#define dmi_gusers da_gusers - -struct hh_cache -{ - struct hh_cache *hh_next; /* Next entry */ - atomic_t hh_refcnt; /* number of users */ -/* - * We want hh_output, hh_len, hh_lock and hh_data be a in a separate - * cache line on SMP. - * They are mostly read, but hh_refcnt may be changed quite frequently, - * incurring cache line ping pongs. - */ - __be16 hh_type ____cacheline_aligned_in_smp; - /* protocol identifier, f.e ETH_P_IP - * NOTE: For VLANs, this will be the - * encapuslated type. --BLG - */ - u16 hh_len; /* length of header */ - int (*hh_output)(struct sk_buff *skb); - seqlock_t hh_lock; - - /* cached hardware header; allow for machine alignment needs. */ -#define HH_DATA_MOD 16 -#define HH_DATA_OFF(__len) \ - (HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1)) -#define HH_DATA_ALIGN(__len) \ - (((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1)) - unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)]; -}; - -/* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much. - * Alternative is: - * dev->hard_header_len ? (dev->hard_header_len + - * (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0 - * - * We could use other alignment values, but we must maintain the - * relationship HH alignment <= LL alignment. - * - * LL_ALLOCATED_SPACE also takes into account the tailroom the device - * may need. - */ -#define LL_RESERVED_SPACE(dev) \ - ((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) -#define LL_RESERVED_SPACE_EXTRA(dev,extra) \ - ((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) -#define LL_ALLOCATED_SPACE(dev) \ - ((((dev)->hard_header_len+(dev)->needed_headroom+(dev)->needed_tailroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) - -struct header_ops { - int (*create) (struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *daddr, - const void *saddr, unsigned len); - int (*parse)(const struct sk_buff *skb, unsigned char *haddr); - int (*rebuild)(struct sk_buff *skb); -#define HAVE_HEADER_CACHE - int (*cache)(const struct neighbour *neigh, struct hh_cache *hh); - void (*cache_update)(struct hh_cache *hh, - const struct net_device *dev, - const unsigned char *haddr); -}; - -/* These flag bits are private to the generic network queueing - * layer, they may not be explicitly referenced by any other - * code. - */ - -enum netdev_state_t -{ - __LINK_STATE_START, - __LINK_STATE_PRESENT, - __LINK_STATE_NOCARRIER, - __LINK_STATE_LINKWATCH_PENDING, - __LINK_STATE_DORMANT, -}; - - -/* - * This structure holds at boot time configured netdevice settings. They - * are then used in the device probing. - */ -struct netdev_boot_setup { - char name[IFNAMSIZ]; - struct ifmap map; -}; -#define NETDEV_BOOT_SETUP_MAX 8 - -extern int __init netdev_boot_setup(char *str); - -/* - * Structure for NAPI scheduling similar to tasklet but with weighting - */ -struct napi_struct { - /* The poll_list must only be managed by the entity which - * changes the state of the NAPI_STATE_SCHED bit. This means - * whoever atomically sets that bit can add this napi_struct - * to the per-cpu poll_list, and whoever clears that bit - * can remove from the list right before clearing the bit. - */ - struct list_head poll_list; - - unsigned long state; - int weight; - int (*poll)(struct napi_struct *, int); -#ifdef CONFIG_NETPOLL - spinlock_t poll_lock; - int poll_owner; - struct net_device *dev; - struct list_head dev_list; -#endif -}; - -enum -{ - NAPI_STATE_SCHED, /* Poll is scheduled */ - NAPI_STATE_DISABLE, /* Disable pending */ -}; - -extern void __napi_schedule(struct napi_struct *n); - -static inline int napi_disable_pending(struct napi_struct *n) -{ - return test_bit(NAPI_STATE_DISABLE, &n->state); -} - -/** - * napi_schedule_prep - check if napi can be scheduled - * @n: napi context - * - * Test if NAPI routine is already running, and if not mark - * it as running. This is used as a condition variable - * insure only one NAPI poll instance runs. We also make - * sure there is no pending NAPI disable. - */ -static inline int napi_schedule_prep(struct napi_struct *n) -{ - return !napi_disable_pending(n) && - !test_and_set_bit(NAPI_STATE_SCHED, &n->state); -} - -/** - * napi_schedule - schedule NAPI poll - * @n: napi context - * - * Schedule NAPI poll routine to be called if it is not already - * running. - */ -static inline void napi_schedule(struct napi_struct *n) -{ - if (napi_schedule_prep(n)) - __napi_schedule(n); -} - -/* Try to reschedule poll. Called by dev->poll() after napi_complete(). */ -static inline int napi_reschedule(struct napi_struct *napi) -{ - if (napi_schedule_prep(napi)) { - __napi_schedule(napi); - return 1; - } - return 0; -} - -/** - * napi_complete - NAPI processing complete - * @n: napi context - * - * Mark NAPI processing as complete. - */ -static inline void __napi_complete(struct napi_struct *n) -{ - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - list_del(&n->poll_list); - smp_mb__before_clear_bit(); - clear_bit(NAPI_STATE_SCHED, &n->state); -} - -static inline void napi_complete(struct napi_struct *n) -{ - unsigned long flags; - - local_irq_save(flags); - __napi_complete(n); - local_irq_restore(flags); -} - -/** - * napi_disable - prevent NAPI from scheduling - * @n: napi context - * - * Stop NAPI from being scheduled on this context. - * Waits till any outstanding processing completes. - */ -static inline void napi_disable(struct napi_struct *n) -{ - set_bit(NAPI_STATE_DISABLE, &n->state); - while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) - msleep(1); - clear_bit(NAPI_STATE_DISABLE, &n->state); -} - -/** - * napi_enable - enable NAPI scheduling - * @n: napi context - * - * Resume NAPI from being scheduled on this context. - * Must be paired with napi_disable. - */ -static inline void napi_enable(struct napi_struct *n) -{ - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - smp_mb__before_clear_bit(); - clear_bit(NAPI_STATE_SCHED, &n->state); -} - -#ifdef CONFIG_SMP -/** - * napi_synchronize - wait until NAPI is not running - * @n: napi context - * - * Wait until NAPI is done being scheduled on this context. - * Waits till any outstanding processing completes but - * does not disable future activations. - */ -static inline void napi_synchronize(const struct napi_struct *n) -{ - while (test_bit(NAPI_STATE_SCHED, &n->state)) - msleep(1); -} -#else -# define napi_synchronize(n) barrier() -#endif - -enum netdev_queue_state_t -{ - __QUEUE_STATE_XOFF, - __QUEUE_STATE_FROZEN, -}; - -struct netdev_queue { - struct net_device *dev; - struct Qdisc *qdisc; - unsigned long state; - spinlock_t _xmit_lock; - int xmit_lock_owner; - struct Qdisc *qdisc_sleeping; -} ____cacheline_aligned_in_smp; - -/* - * The DEVICE structure. - * Actually, this whole structure is a big mistake. It mixes I/O - * data with strictly "high-level" data, and it has to know about - * almost every data structure used in the INET module. - * - * FIXME: cleanup struct net_device such that network protocol info - * moves out. - */ - -struct net_device -{ - - /* - * This is the first field of the "visible" part of this structure - * (i.e. as seen by users in the "Space.c" file). It is the name - * the interface. - */ - char name[IFNAMSIZ]; - /* device name hash chain */ - struct hlist_node name_hlist; - - /* - * I/O specific fields - * FIXME: Merge these and struct ifmap into one - */ - unsigned long mem_end; /* shared mem end */ - unsigned long mem_start; /* shared mem start */ - unsigned long base_addr; /* device I/O address */ - unsigned int irq; /* device IRQ number */ - - /* - * Some hardware also needs these fields, but they are not - * part of the usual set specified in Space.c. - */ - - unsigned char if_port; /* Selectable AUI, TP,..*/ - unsigned char dma; /* DMA channel */ - - unsigned long state; - - struct list_head dev_list; -#ifdef CONFIG_NETPOLL - struct list_head napi_list; -#endif - - /* The device initialization function. Called only once. */ - int (*init)(struct net_device *dev); - - /* ------- Fields preinitialized in Space.c finish here ------- */ - - /* Net device features */ - unsigned long features; -#define NETIF_F_SG 1 /* Scatter/gather IO. */ -#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ -#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ -#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ -#define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */ -#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ -#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ -#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ -#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ -#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ -#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ -#define NETIF_F_GSO 2048 /* Enable software GSO. */ -#define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ - /* do not use LLTX in new drivers */ -#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ -#define NETIF_F_LRO 32768 /* large receive offload */ - - /* Segmentation offload features */ -#define NETIF_F_GSO_SHIFT 16 -#define NETIF_F_GSO_MASK 0xffff0000 -#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) -#define NETIF_F_UFO (SKB_GSO_UDP << NETIF_F_GSO_SHIFT) -#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) -#define NETIF_F_TSO_ECN (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT) -#define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT) - - /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6) - - -#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) -#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) -#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) -#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) - - /* Interface index. Unique device identifier */ - int ifindex; - int iflink; - - - struct net_device_stats* (*get_stats)(struct net_device *dev); - struct net_device_stats stats; - -#ifdef CONFIG_WIRELESS_EXT - /* List of functions to handle Wireless Extensions (instead of ioctl). - * See for details. Jean II */ - const struct iw_handler_def * wireless_handlers; - /* Instance data managed by the core of Wireless Extensions. */ - struct iw_public_data * wireless_data; -#endif - const struct ethtool_ops *ethtool_ops; - - /* Hardware header description */ - const struct header_ops *header_ops; - - /* - * This marks the end of the "visible" part of the structure. All - * fields hereafter are internal to the system, and may change at - * will (read: may be cleaned up at will). - */ - - - unsigned int flags; /* interface flags (a la BSD) */ - unsigned short gflags; - unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ - unsigned short padded; /* How much padding added by alloc_netdev() */ - - unsigned char operstate; /* RFC2863 operstate */ - unsigned char link_mode; /* mapping policy to operstate */ - - unsigned mtu; /* interface MTU value */ - unsigned short type; /* interface hardware type */ - unsigned short hard_header_len; /* hardware hdr length */ - - /* extra head- and tailroom the hardware may need, but not in all cases - * can this be guaranteed, especially tailroom. Some cases also use - * LL_MAX_HEADER instead to allocate the skb. - */ - unsigned short needed_headroom; - unsigned short needed_tailroom; - - struct net_device *master; /* Pointer to master device of a group, - * which this device is member of. - */ - - /* Interface address info. */ - unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ - unsigned char addr_len; /* hardware address length */ - unsigned short dev_id; /* for shared network cards */ - - spinlock_t addr_list_lock; - struct dev_addr_list *uc_list; /* Secondary unicast mac addresses */ - int uc_count; /* Number of installed ucasts */ - int uc_promisc; - struct dev_addr_list *mc_list; /* Multicast mac addresses */ - int mc_count; /* Number of installed mcasts */ - unsigned int promiscuity; - unsigned int allmulti; - - - /* Protocol specific pointers */ - - void *atalk_ptr; /* AppleTalk link */ - void *ip_ptr; /* IPv4 specific data */ - void *dn_ptr; /* DECnet specific data */ - void *ip6_ptr; /* IPv6 specific data */ - void *ec_ptr; /* Econet specific data */ - void *ax25_ptr; /* AX.25 specific data */ - struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data, - assign before registering */ - -/* - * Cache line mostly used on receive path (including eth_type_trans()) - */ - unsigned long last_rx; /* Time of last Rx */ - /* Interface address info used in eth_type_trans() */ - unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast - because most packets are unicast) */ - - unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ - - struct netdev_queue rx_queue; - - struct netdev_queue *_tx ____cacheline_aligned_in_smp; - - /* Number of TX queues allocated at alloc_netdev_mq() time */ - unsigned int num_tx_queues; - - /* Number of TX queues currently active in device */ - unsigned int real_num_tx_queues; - - unsigned long tx_queue_len; /* Max frames per queue allowed */ - spinlock_t tx_global_lock; -/* - * One part is mostly used on xmit path (device) - */ - void *priv; /* pointer to private data */ - int (*hard_start_xmit) (struct sk_buff *skb, - struct net_device *dev); - /* These may be needed for future network-power-down code. */ - unsigned long trans_start; /* Time (in jiffies) of last Tx */ - - int watchdog_timeo; /* used by dev_watchdog() */ - struct timer_list watchdog_timer; - -/* - * refcnt is a very hot point, so align it on SMP - */ - /* Number of references to this device */ - atomic_t refcnt ____cacheline_aligned_in_smp; - - /* delayed register/unregister */ - struct list_head todo_list; - /* device index hash chain */ - struct hlist_node index_hlist; - - struct net_device *link_watch_next; - - /* register/unregister state machine */ - enum { NETREG_UNINITIALIZED=0, - NETREG_REGISTERED, /* completed register_netdevice */ - NETREG_UNREGISTERING, /* called unregister_netdevice */ - NETREG_UNREGISTERED, /* completed unregister todo */ - NETREG_RELEASED, /* called free_netdev */ - } reg_state; - - /* Called after device is detached from network. */ - void (*uninit)(struct net_device *dev); - /* Called after last user reference disappears. */ - void (*destructor)(struct net_device *dev); - - /* Pointers to interface service routines. */ - int (*open)(struct net_device *dev); - int (*stop)(struct net_device *dev); -#define HAVE_NETDEV_POLL -#define HAVE_CHANGE_RX_FLAGS - void (*change_rx_flags)(struct net_device *dev, - int flags); -#define HAVE_SET_RX_MODE - void (*set_rx_mode)(struct net_device *dev); -#define HAVE_MULTICAST - void (*set_multicast_list)(struct net_device *dev); -#define HAVE_SET_MAC_ADDR - int (*set_mac_address)(struct net_device *dev, - void *addr); -#define HAVE_VALIDATE_ADDR - int (*validate_addr)(struct net_device *dev); -#define HAVE_PRIVATE_IOCTL - int (*do_ioctl)(struct net_device *dev, - struct ifreq *ifr, int cmd); -#define HAVE_SET_CONFIG - int (*set_config)(struct net_device *dev, - struct ifmap *map); -#define HAVE_CHANGE_MTU - int (*change_mtu)(struct net_device *dev, int new_mtu); - -#define HAVE_TX_TIMEOUT - void (*tx_timeout) (struct net_device *dev); - - void (*vlan_rx_register)(struct net_device *dev, - struct vlan_group *grp); - void (*vlan_rx_add_vid)(struct net_device *dev, - unsigned short vid); - void (*vlan_rx_kill_vid)(struct net_device *dev, - unsigned short vid); - - int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); -#ifdef CONFIG_NETPOLL - struct netpoll_info *npinfo; -#endif -#ifdef CONFIG_NET_POLL_CONTROLLER - void (*poll_controller)(struct net_device *dev); -#endif - - u16 (*select_queue)(struct net_device *dev, - struct sk_buff *skb); - -#ifdef CONFIG_NET_NS - /* Network namespace this network device is inside */ - struct net *nd_net; -#endif - - /* mid-layer private */ - void *ml_priv; - - /* bridge stuff */ - struct net_bridge_port *br_port; - /* macvlan */ - struct macvlan_port *macvlan_port; - /* GARP */ - struct garp_port *garp_port; - - /* class/net/name entry */ - struct device dev; - /* space for optional statistics and wireless sysfs groups */ - struct attribute_group *sysfs_groups[3]; - - /* rtnetlink link ops */ - const struct rtnl_link_ops *rtnl_link_ops; - - /* VLAN feature mask */ - unsigned long vlan_features; - - /* for setting kernel sock attribute on TCP connection setup */ -#define GSO_MAX_SIZE 65536 - unsigned int gso_max_size; -}; -#define to_net_dev(d) container_of(d, struct net_device, dev) - -#define NETDEV_ALIGN 32 -#define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1) - -static inline -struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, - unsigned int index) -{ - return &dev->_tx[index]; -} - -static inline void netdev_for_each_tx_queue(struct net_device *dev, - void (*f)(struct net_device *, - struct netdev_queue *, - void *), - void *arg) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) - f(dev, &dev->_tx[i], arg); -} - -/* - * Net namespace inlines - */ -static inline -struct net *dev_net(const struct net_device *dev) -{ -#ifdef CONFIG_NET_NS - return dev->nd_net; -#else - return &init_net; -#endif -} - -static inline -void dev_net_set(struct net_device *dev, struct net *net) -{ -#ifdef CONFIG_NET_NS - release_net(dev->nd_net); - dev->nd_net = hold_net(net); -#endif -} - -/** - * netdev_priv - access network device private data - * @dev: network device - * - * Get network device private data - */ -static inline void *netdev_priv(const struct net_device *dev) -{ - return (char *)dev + ((sizeof(struct net_device) - + NETDEV_ALIGN_CONST) - & ~NETDEV_ALIGN_CONST); -} - -/* Set the sysfs physical device reference for the network logical device - * if set prior to registration will cause a symlink during initialization. - */ -#define SET_NETDEV_DEV(net, pdev) ((net)->dev.parent = (pdev)) - -/** - * netif_napi_add - initialize a napi context - * @dev: network device - * @napi: napi context - * @poll: polling function - * @weight: default weight - * - * netif_napi_add() must be used to initialize a napi context prior to calling - * *any* of the other napi related functions. - */ -static inline void netif_napi_add(struct net_device *dev, - struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), - int weight) -{ - INIT_LIST_HEAD(&napi->poll_list); - napi->poll = poll; - napi->weight = weight; -#ifdef CONFIG_NETPOLL - napi->dev = dev; - list_add(&napi->dev_list, &dev->napi_list); - spin_lock_init(&napi->poll_lock); - napi->poll_owner = -1; -#endif - set_bit(NAPI_STATE_SCHED, &napi->state); -} - -/** - * netif_napi_del - remove a napi context - * @napi: napi context - * - * netif_napi_del() removes a napi context from the network device napi list - */ -static inline void netif_napi_del(struct napi_struct *napi) -{ -#ifdef CONFIG_NETPOLL - list_del(&napi->dev_list); -#endif -} - -struct packet_type { - __be16 type; /* This is really htons(ether_type). */ - struct net_device *dev; /* NULL is wildcarded here */ - unsigned char sknid_elevator; - int (*func) (struct sk_buff *, - struct net_device *, - struct packet_type *, - struct net_device *); - struct sk_buff *(*gso_segment)(struct sk_buff *skb, - int features); - int (*gso_send_check)(struct sk_buff *skb); - void *af_packet_priv; - struct list_head list; -}; - -#include -#include - -extern rwlock_t dev_base_lock; /* Device list lock */ - - -#define for_each_netdev(net, d) \ - list_for_each_entry(d, &(net)->dev_base_head, dev_list) -#define for_each_netdev_safe(net, d, n) \ - list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) -#define for_each_netdev_continue(net, d) \ - list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) -#define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) - -static inline struct net_device *next_net_device(struct net_device *dev) -{ - struct list_head *lh; - struct net *net; - - net = dev_net(dev); - lh = dev->dev_list.next; - return lh == &net->dev_base_head ? NULL : net_device_entry(lh); -} - -static inline struct net_device *first_net_device(struct net *net) -{ - return list_empty(&net->dev_base_head) ? NULL : - net_device_entry(net->dev_base_head.next); -} - -extern int netdev_boot_setup_check(struct net_device *dev); -extern unsigned long netdev_boot_base(const char *prefix, int unit); -extern struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *hwaddr); -extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); -extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type); -extern void dev_add_pack(struct packet_type *pt); -extern void dev_remove_pack(struct packet_type *pt); -extern void __dev_remove_pack(struct packet_type *pt); - -extern struct net_device *dev_get_by_flags(struct net *net, unsigned short flags, - unsigned short mask); -extern struct net_device *dev_get_by_name(struct net *net, const char *name); -extern struct net_device *__dev_get_by_name(struct net *net, const char *name); -extern int dev_alloc_name(struct net_device *dev, const char *name); -extern int dev_open(struct net_device *dev); -extern int dev_close(struct net_device *dev); -extern void dev_disable_lro(struct net_device *dev); -extern int dev_queue_xmit(struct sk_buff *skb); -extern int register_netdevice(struct net_device *dev); -extern void unregister_netdevice(struct net_device *dev); -extern void free_netdev(struct net_device *dev); -extern void synchronize_net(void); -extern int register_netdevice_notifier(struct notifier_block *nb); -extern int unregister_netdevice_notifier(struct notifier_block *nb); -extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); -extern struct net_device *dev_get_by_index(struct net *net, int ifindex); -extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); -extern int dev_restart(struct net_device *dev); -#ifdef CONFIG_NETPOLL_TRAP -extern int netpoll_trap(void); -#endif - -static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, - const void *daddr, const void *saddr, - unsigned len) -{ - if (!dev->header_ops || !dev->header_ops->create) - return 0; - - return dev->header_ops->create(skb, dev, type, daddr, saddr, len); -} - -static inline int dev_parse_header(const struct sk_buff *skb, - unsigned char *haddr) -{ - const struct net_device *dev = skb->dev; - - if (!dev->header_ops || !dev->header_ops->parse) - return 0; - return dev->header_ops->parse(skb, haddr); -} - -typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); -extern int register_gifconf(unsigned int family, gifconf_func_t * gifconf); -static inline int unregister_gifconf(unsigned int family) -{ - return register_gifconf(family, NULL); -} - -/* - * Incoming packets are placed on per-cpu queues so that - * no locking is needed. - */ -struct softnet_data -{ - struct Qdisc *output_queue; - struct sk_buff_head input_pkt_queue; - struct list_head poll_list; - struct sk_buff *completion_queue; - - struct napi_struct backlog; -#ifdef CONFIG_NET_DMA - struct dma_chan *net_dma; -#endif -}; - -DECLARE_PER_CPU(struct softnet_data,softnet_data); - -#define HAVE_NETIF_QUEUE - -extern void __netif_schedule(struct Qdisc *q); - -static inline void netif_schedule_queue(struct netdev_queue *txq) -{ - if (!test_bit(__QUEUE_STATE_XOFF, &txq->state)) - __netif_schedule(txq->qdisc); -} - -static inline void netif_tx_schedule_all(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) - netif_schedule_queue(netdev_get_tx_queue(dev, i)); -} - -static inline void netif_tx_start_queue(struct netdev_queue *dev_queue) -{ - clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state); -} - -/** - * netif_start_queue - allow transmit - * @dev: network device - * - * Allow upper layers to call the device hard_start_xmit routine. - */ -static inline void netif_start_queue(struct net_device *dev) -{ - netif_tx_start_queue(netdev_get_tx_queue(dev, 0)); -} - -static inline void netif_tx_start_all_queues(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - netif_tx_start_queue(txq); - } -} - -static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) -{ -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) { - clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state); - return; - } -#endif - if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state)) - __netif_schedule(dev_queue->qdisc); -} - -/** - * netif_wake_queue - restart transmit - * @dev: network device - * - * Allow upper layers to call the device hard_start_xmit routine. - * Used for flow control when transmit resources are available. - */ -static inline void netif_wake_queue(struct net_device *dev) -{ - netif_tx_wake_queue(netdev_get_tx_queue(dev, 0)); -} - -static inline void netif_tx_wake_all_queues(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - netif_tx_wake_queue(txq); - } -} - -static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) -{ - set_bit(__QUEUE_STATE_XOFF, &dev_queue->state); -} - -/** - * netif_stop_queue - stop transmitted packets - * @dev: network device - * - * Stop upper layers calling the device hard_start_xmit routine. - * Used for flow control when transmit resources are unavailable. - */ -static inline void netif_stop_queue(struct net_device *dev) -{ - netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); -} - -static inline void netif_tx_stop_all_queues(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - netif_tx_stop_queue(txq); - } -} - -static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue) -{ - return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state); -} - -/** - * netif_queue_stopped - test if transmit queue is flowblocked - * @dev: network device - * - * Test if transmit queue on device is currently unable to send. - */ -static inline int netif_queue_stopped(const struct net_device *dev) -{ - return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); -} - -static inline int netif_tx_queue_frozen(const struct netdev_queue *dev_queue) -{ - return test_bit(__QUEUE_STATE_FROZEN, &dev_queue->state); -} - -/** - * netif_running - test if up - * @dev: network device - * - * Test if the device has been brought up. - */ -static inline int netif_running(const struct net_device *dev) -{ - return test_bit(__LINK_STATE_START, &dev->state); -} - -/* - * Routines to manage the subqueues on a device. We only need start - * stop, and a check if it's stopped. All other device management is - * done at the overall netdevice level. - * Also test the device if we're multiqueue. - */ - -/** - * netif_start_subqueue - allow sending packets on subqueue - * @dev: network device - * @queue_index: sub queue index - * - * Start individual transmit queue of a device with multiple transmit queues. - */ -static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) -{ - struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); - clear_bit(__QUEUE_STATE_XOFF, &txq->state); -} - -/** - * netif_stop_subqueue - stop sending packets on subqueue - * @dev: network device - * @queue_index: sub queue index - * - * Stop individual transmit queue of a device with multiple transmit queues. - */ -static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) -{ - struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) - return; -#endif - set_bit(__QUEUE_STATE_XOFF, &txq->state); -} - -/** - * netif_subqueue_stopped - test status of subqueue - * @dev: network device - * @queue_index: sub queue index - * - * Check individual transmit queue of a device with multiple transmit queues. - */ -static inline int __netif_subqueue_stopped(const struct net_device *dev, - u16 queue_index) -{ - struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); - return test_bit(__QUEUE_STATE_XOFF, &txq->state); -} - -static inline int netif_subqueue_stopped(const struct net_device *dev, - struct sk_buff *skb) -{ - return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb)); -} - -/** - * netif_wake_subqueue - allow sending packets on subqueue - * @dev: network device - * @queue_index: sub queue index - * - * Resume individual transmit queue of a device with multiple transmit queues. - */ -static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) -{ - struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) - return; -#endif - if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state)) - __netif_schedule(txq->qdisc); -} - -/** - * netif_is_multiqueue - test if device has multiple transmit queues - * @dev: network device - * - * Check if device has multiple transmit queues - */ -static inline int netif_is_multiqueue(const struct net_device *dev) -{ - return (dev->num_tx_queues > 1); -} - -/* Use this variant when it is known for sure that it - * is executing from hardware interrupt context or with hardware interrupts - * disabled. - */ -extern void dev_kfree_skb_irq(struct sk_buff *skb); - -/* Use this variant in places where it could be invoked - * from either hardware interrupt or other context, with hardware interrupts - * either disabled or enabled. - */ -extern void dev_kfree_skb_any(struct sk_buff *skb); - -#define HAVE_NETIF_RX 1 -extern int netif_rx(struct sk_buff *skb); -extern int netif_rx_ni(struct sk_buff *skb); -#define HAVE_NETIF_RECEIVE_SKB 1 -extern int netif_receive_skb(struct sk_buff *skb); -extern void netif_nit_deliver(struct sk_buff *skb); -extern int dev_valid_name(const char *name); -extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); -extern int dev_ethtool(struct net *net, struct ifreq *); -extern unsigned dev_get_flags(const struct net_device *); -extern int dev_change_flags(struct net_device *, unsigned); -extern int dev_change_name(struct net_device *, char *); -extern int dev_change_net_namespace(struct net_device *, - struct net *, const char *); -extern int dev_set_mtu(struct net_device *, int); -extern int dev_set_mac_address(struct net_device *, - struct sockaddr *); -extern int dev_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev, - struct netdev_queue *txq); - -extern int netdev_budget; - -/* Called by rtnetlink.c:rtnl_unlock() */ -extern void netdev_run_todo(void); - -/** - * dev_put - release reference to device - * @dev: network device - * - * Release reference to device to allow it to be freed. - */ -static inline void dev_put(struct net_device *dev) -{ - atomic_dec(&dev->refcnt); -} - -/** - * dev_hold - get reference to device - * @dev: network device - * - * Hold reference to device to keep it from being freed. - */ -static inline void dev_hold(struct net_device *dev) -{ - atomic_inc(&dev->refcnt); -} - -/* Carrier loss detection, dial on demand. The functions netif_carrier_on - * and _off may be called from IRQ context, but it is caller - * who is responsible for serialization of these calls. - * - * The name carrier is inappropriate, these functions should really be - * called netif_lowerlayer_*() because they represent the state of any - * kind of lower layer not just hardware media. - */ - -extern void linkwatch_fire_event(struct net_device *dev); - -/** - * netif_carrier_ok - test if carrier present - * @dev: network device - * - * Check if carrier is present on device - */ -static inline int netif_carrier_ok(const struct net_device *dev) -{ - return !test_bit(__LINK_STATE_NOCARRIER, &dev->state); -} - -extern void __netdev_watchdog_up(struct net_device *dev); - -extern void netif_carrier_on(struct net_device *dev); - -extern void netif_carrier_off(struct net_device *dev); - -/** - * netif_dormant_on - mark device as dormant. - * @dev: network device - * - * Mark device as dormant (as per RFC2863). - * - * The dormant state indicates that the relevant interface is not - * actually in a condition to pass packets (i.e., it is not 'up') but is - * in a "pending" state, waiting for some external event. For "on- - * demand" interfaces, this new state identifies the situation where the - * interface is waiting for events to place it in the up state. - * - */ -static inline void netif_dormant_on(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_DORMANT, &dev->state)) - linkwatch_fire_event(dev); -} - -/** - * netif_dormant_off - set device as not dormant. - * @dev: network device - * - * Device is not in dormant state. - */ -static inline void netif_dormant_off(struct net_device *dev) -{ - if (test_and_clear_bit(__LINK_STATE_DORMANT, &dev->state)) - linkwatch_fire_event(dev); -} - -/** - * netif_dormant - test if carrier present - * @dev: network device - * - * Check if carrier is present on device - */ -static inline int netif_dormant(const struct net_device *dev) -{ - return test_bit(__LINK_STATE_DORMANT, &dev->state); -} - - -/** - * netif_oper_up - test if device is operational - * @dev: network device - * - * Check if carrier is operational - */ -static inline int netif_oper_up(const struct net_device *dev) { - return (dev->operstate == IF_OPER_UP || - dev->operstate == IF_OPER_UNKNOWN /* backward compat */); -} - -/** - * netif_device_present - is device available or removed - * @dev: network device - * - * Check if device has not been removed from system. - */ -static inline int netif_device_present(struct net_device *dev) -{ - return test_bit(__LINK_STATE_PRESENT, &dev->state); -} - -extern void netif_device_detach(struct net_device *dev); - -extern void netif_device_attach(struct net_device *dev); - -/* - * Network interface message level settings - */ -#define HAVE_NETIF_MSG 1 - -enum { - NETIF_MSG_DRV = 0x0001, - NETIF_MSG_PROBE = 0x0002, - NETIF_MSG_LINK = 0x0004, - NETIF_MSG_TIMER = 0x0008, - NETIF_MSG_IFDOWN = 0x0010, - NETIF_MSG_IFUP = 0x0020, - NETIF_MSG_RX_ERR = 0x0040, - NETIF_MSG_TX_ERR = 0x0080, - NETIF_MSG_TX_QUEUED = 0x0100, - NETIF_MSG_INTR = 0x0200, - NETIF_MSG_TX_DONE = 0x0400, - NETIF_MSG_RX_STATUS = 0x0800, - NETIF_MSG_PKTDATA = 0x1000, - NETIF_MSG_HW = 0x2000, - NETIF_MSG_WOL = 0x4000, -}; - -#define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) -#define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) -#define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK) -#define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER) -#define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN) -#define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP) -#define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR) -#define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR) -#define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED) -#define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR) -#define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE) -#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS) -#define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA) -#define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW) -#define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL) - -static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) -{ - /* use default */ - if (debug_value < 0 || debug_value >= (sizeof(u32) * 8)) - return default_msg_enable_bits; - if (debug_value == 0) /* no output */ - return 0; - /* set low N bits */ - return (1 << debug_value) - 1; -} - -/* Test if receive needs to be scheduled but only if up */ -static inline int netif_rx_schedule_prep(struct net_device *dev, - struct napi_struct *napi) -{ - return napi_schedule_prep(napi); -} - -/* Add interface to tail of rx poll list. This assumes that _prep has - * already been called and returned 1. - */ -static inline void __netif_rx_schedule(struct net_device *dev, - struct napi_struct *napi) -{ - __napi_schedule(napi); -} - -/* Try to reschedule poll. Called by irq handler. */ - -static inline void netif_rx_schedule(struct net_device *dev, - struct napi_struct *napi) -{ - if (netif_rx_schedule_prep(dev, napi)) - __netif_rx_schedule(dev, napi); -} - -/* Try to reschedule poll. Called by dev->poll() after netif_rx_complete(). */ -static inline int netif_rx_reschedule(struct net_device *dev, - struct napi_struct *napi) -{ - if (napi_schedule_prep(napi)) { - __netif_rx_schedule(dev, napi); - return 1; - } - return 0; -} - -/* same as netif_rx_complete, except that local_irq_save(flags) - * has already been issued - */ -static inline void __netif_rx_complete(struct net_device *dev, - struct napi_struct *napi) -{ - __napi_complete(napi); -} - -/* Remove interface from poll list: it must be in the poll list - * on current cpu. This primitive is called by dev->poll(), when - * it completes the work. The device cannot be out of poll list at this - * moment, it is BUG(). - */ -static inline void netif_rx_complete(struct net_device *dev, - struct napi_struct *napi) -{ - unsigned long flags; - - local_irq_save(flags); - __netif_rx_complete(dev, napi); - local_irq_restore(flags); -} - -static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) -{ - spin_lock(&txq->_xmit_lock); - txq->xmit_lock_owner = cpu; -} - -static inline void __netif_tx_lock_bh(struct netdev_queue *txq) -{ - spin_lock_bh(&txq->_xmit_lock); - txq->xmit_lock_owner = smp_processor_id(); -} - -static inline int __netif_tx_trylock(struct netdev_queue *txq) -{ - int ok = spin_trylock(&txq->_xmit_lock); - if (likely(ok)) - txq->xmit_lock_owner = smp_processor_id(); - return ok; -} - -static inline void __netif_tx_unlock(struct netdev_queue *txq) -{ - txq->xmit_lock_owner = -1; - spin_unlock(&txq->_xmit_lock); -} - -static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) -{ - txq->xmit_lock_owner = -1; - spin_unlock_bh(&txq->_xmit_lock); -} - -/** - * netif_tx_lock - grab network device transmit lock - * @dev: network device - * @cpu: cpu number of lock owner - * - * Get network device transmit lock - */ -static inline void netif_tx_lock(struct net_device *dev) -{ - unsigned int i; - int cpu; - - spin_lock(&dev->tx_global_lock); - cpu = smp_processor_id(); - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - - /* We are the only thread of execution doing a - * freeze, but we have to grab the _xmit_lock in - * order to synchronize with threads which are in - * the ->hard_start_xmit() handler and already - * checked the frozen bit. - */ - __netif_tx_lock(txq, cpu); - set_bit(__QUEUE_STATE_FROZEN, &txq->state); - __netif_tx_unlock(txq); - } -} - -static inline void netif_tx_lock_bh(struct net_device *dev) -{ - local_bh_disable(); - netif_tx_lock(dev); -} - -static inline void netif_tx_unlock(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - - /* No need to grab the _xmit_lock here. If the - * queue is not stopped for another reason, we - * force a schedule. - */ - clear_bit(__QUEUE_STATE_FROZEN, &txq->state); - if (!test_bit(__QUEUE_STATE_XOFF, &txq->state)) - __netif_schedule(txq->qdisc); - } - spin_unlock(&dev->tx_global_lock); -} - -static inline void netif_tx_unlock_bh(struct net_device *dev) -{ - netif_tx_unlock(dev); - local_bh_enable(); -} - -#define HARD_TX_LOCK(dev, txq, cpu) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ - __netif_tx_lock(txq, cpu); \ - } \ -} - -#define HARD_TX_UNLOCK(dev, txq) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ - __netif_tx_unlock(txq); \ - } \ -} - -static inline void netif_tx_disable(struct net_device *dev) -{ - unsigned int i; - int cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - - __netif_tx_lock(txq, cpu); - netif_tx_stop_queue(txq); - __netif_tx_unlock(txq); - } - local_bh_enable(); -} - -static inline void netif_addr_lock(struct net_device *dev) -{ - spin_lock(&dev->addr_list_lock); -} - -static inline void netif_addr_lock_bh(struct net_device *dev) -{ - spin_lock_bh(&dev->addr_list_lock); -} - -static inline void netif_addr_unlock(struct net_device *dev) -{ - spin_unlock(&dev->addr_list_lock); -} - -static inline void netif_addr_unlock_bh(struct net_device *dev) -{ - spin_unlock_bh(&dev->addr_list_lock); -} - -/* These functions live elsewhere (drivers/net/net_init.c, but related) */ - -extern void ether_setup(struct net_device *dev); - -/* Support for loadable net-drivers */ -extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, - void (*setup)(struct net_device *), - unsigned int queue_count); -#define alloc_netdev(sizeof_priv, name, setup) \ - alloc_netdev_mq(sizeof_priv, name, setup, 1) -extern int register_netdev(struct net_device *dev); -extern void unregister_netdev(struct net_device *dev); -/* Functions used for secondary unicast and multicast support */ -extern void dev_set_rx_mode(struct net_device *dev); -extern void __dev_set_rx_mode(struct net_device *dev); -extern int dev_unicast_delete(struct net_device *dev, void *addr, int alen); -extern int dev_unicast_add(struct net_device *dev, void *addr, int alen); -extern int dev_unicast_sync(struct net_device *to, struct net_device *from); -extern void dev_unicast_unsync(struct net_device *to, struct net_device *from); -extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); -extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); -extern int dev_mc_sync(struct net_device *to, struct net_device *from); -extern void dev_mc_unsync(struct net_device *to, struct net_device *from); -extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all); -extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); -extern int __dev_addr_sync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count); -extern void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count); -extern int dev_set_promiscuity(struct net_device *dev, int inc); -extern int dev_set_allmulti(struct net_device *dev, int inc); -extern void netdev_state_change(struct net_device *dev); -extern void netdev_bonding_change(struct net_device *dev); -extern void netdev_features_change(struct net_device *dev); -/* Load a device via the kmod */ -extern void dev_load(struct net *net, const char *name); -extern void dev_mcast_init(void); -extern int netdev_max_backlog; -extern int weight_p; -extern int netdev_set_master(struct net_device *dev, struct net_device *master); -extern int skb_checksum_help(struct sk_buff *skb); -extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features); -#ifdef CONFIG_BUG -extern void netdev_rx_csum_fault(struct net_device *dev); -#else -static inline void netdev_rx_csum_fault(struct net_device *dev) -{ -} -#endif -/* rx skb timestamps */ -extern void net_enable_timestamp(void); -extern void net_disable_timestamp(void); - -#ifdef CONFIG_PROC_FS -extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); -extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); -extern void dev_seq_stop(struct seq_file *seq, void *v); -#endif - -extern int netdev_class_create_file(struct class_attribute *class_attr); -extern void netdev_class_remove_file(struct class_attribute *class_attr); - -extern char *netdev_drivername(struct net_device *dev, char *buffer, int len); - -extern void linkwatch_run_queue(void); - -extern int netdev_compute_features(unsigned long all, unsigned long one); - -static inline int net_gso_ok(int features, int gso_type) -{ - int feature = gso_type << NETIF_F_GSO_SHIFT; - return (features & feature) == feature; -} - -static inline int skb_gso_ok(struct sk_buff *skb, int features) -{ - return net_gso_ok(features, skb_shinfo(skb)->gso_type); -} - -static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) -{ - return skb_is_gso(skb) && - (!skb_gso_ok(skb, dev->features) || - unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); -} - -static inline void netif_set_gso_max_size(struct net_device *dev, - unsigned int size) -{ - dev->gso_max_size = size; -} - -/* On bonding slaves other than the currently active slave, suppress - * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and - * ARP on active-backup slaves with arp_validate enabled. - */ -static inline int skb_bond_should_drop(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct net_device *master = dev->master; - - if (master && - (dev->priv_flags & IFF_SLAVE_INACTIVE)) { - if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && - skb->protocol == __constant_htons(ETH_P_ARP)) - return 0; - - if (master->priv_flags & IFF_MASTER_ALB) { - if (skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) - return 0; - } - if (master->priv_flags & IFF_MASTER_8023AD && - skb->protocol == __constant_htons(ETH_P_SLOW)) - return 0; - - return 1; - } - return 0; -} - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_DEV_H */ diff -Nurb linux-2.6.27-720/include/linux/netlink.h linux-2.6.27-710/include/linux/netlink.h --- linux-2.6.27-720/include/linux/netlink.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/netlink.h 2008-10-09 18:13:53.000000000 -0400 @@ -242,7 +242,7 @@ nlh->nlmsg_flags = flags; nlh->nlmsg_pid = pid; nlh->nlmsg_seq = seq; - memset((char*) NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); + memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); return nlh; } diff -Nurb linux-2.6.27-720/include/linux/page-flags.h linux-2.6.27-710/include/linux/page-flags.h --- linux-2.6.27-720/include/linux/page-flags.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/page-flags.h 2008-10-09 18:13:53.000000000 -0400 @@ -174,10 +174,8 @@ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ PAGEFLAG(SavePinned, savepinned); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) -#ifndef __cplusplus PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) -#endif __PAGEFLAG(SlobPage, slob_page) __PAGEFLAG(SlobFree, slob_free) diff -Nurb linux-2.6.27-720/include/linux/prefetch.h linux-2.6.27-710/include/linux/prefetch.h --- linux-2.6.27-720/include/linux/prefetch.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/prefetch.h 2008-10-09 18:13:53.000000000 -0400 @@ -54,9 +54,9 @@ { #ifdef ARCH_HAS_PREFETCH char *cp; - char *end = (char*)(addr) + len; + char *end = addr + len; - for (cp = (char*)(addr); cp < end; cp += PREFETCH_STRIDE) + for (cp = addr; cp < end; cp += PREFETCH_STRIDE) prefetch(cp); #endif } diff -Nurb linux-2.6.27-720/include/linux/proc_fs.h linux-2.6.27-710/include/linux/proc_fs.h --- linux-2.6.27-720/include/linux/proc_fs.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/proc_fs.h 2009-05-04 12:15:13.000000000 -0400 @@ -315,7 +315,7 @@ static inline struct net *PDE_NET(struct proc_dir_entry *pde) { - return (struct net *) pde->parent->data; + return pde->parent->data; } struct proc_maps_private { diff -Nurb linux-2.6.27-720/include/linux/rculist.h linux-2.6.27-710/include/linux/rculist.h --- linux-2.6.27-720/include/linux/rculist.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/rculist.h 2008-10-09 18:13:53.000000000 -0400 @@ -94,7 +94,7 @@ static inline void list_del_rcu(struct list_head *entry) { __list_del(entry->prev, entry->next); - entry->prev = (struct list_head *) LIST_POISON2; + entry->prev = LIST_POISON2; } /** @@ -140,7 +140,7 @@ new->prev = old->prev; rcu_assign_pointer(new->prev->next, new); new->next->prev = new; - old->prev = (struct list_head *) LIST_POISON2; + old->prev = LIST_POISON2; } /** @@ -271,7 +271,7 @@ static inline void hlist_del_rcu(struct hlist_node *n) { __hlist_del(n); - n->pprev = (struct hlist_node **) LIST_POISON2; + n->pprev = LIST_POISON2; } /** @@ -291,7 +291,7 @@ rcu_assign_pointer(*new->pprev, new); if (next) new->next->pprev = &new->next; - old->pprev = (struct hlist_node **) LIST_POISON2; + old->pprev = LIST_POISON2; } /** diff -Nurb linux-2.6.27-720/include/linux/reiserfs_fs_sb.h linux-2.6.27-710/include/linux/reiserfs_fs_sb.h --- linux-2.6.27-720/include/linux/reiserfs_fs_sb.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/reiserfs_fs_sb.h 2009-05-04 12:15:13.000000000 -0400 @@ -337,7 +337,8 @@ } journal; } reiserfs_proc_info_data_t; #else -typedef EMPTY_STRUCT_DECL(reiserfs_proc_info_data) reiserfs_proc_info_data_t; +typedef struct reiserfs_proc_info_data { +} reiserfs_proc_info_data_t; #endif /* reiserfs union of in-core super block data */ diff -Nurb linux-2.6.27-720/include/linux/rtnetlink.h linux-2.6.27-710/include/linux/rtnetlink.h --- linux-2.6.27-720/include/linux/rtnetlink.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/rtnetlink.h 2008-10-09 18:13:53.000000000 -0400 @@ -727,7 +727,7 @@ rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size)); rta->rta_type = attrtype; rta->rta_len = size; - memset((char*) RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size); + memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size); return rta; } diff -Nurb linux-2.6.27-720/include/linux/scatterlist.h linux-2.6.27-710/include/linux/scatterlist.h --- linux-2.6.27-720/include/linux/scatterlist.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/scatterlist.h 2008-10-09 18:13:53.000000000 -0400 @@ -196,7 +196,7 @@ **/ static inline void *sg_virt(struct scatterlist *sg) { - return (char *) page_address(sg_page(sg)) + sg->offset; + return page_address(sg_page(sg)) + sg->offset; } struct scatterlist *sg_next(struct scatterlist *); diff -Nurb linux-2.6.27-720/include/linux/sched.h linux-2.6.27-710/include/linux/sched.h --- linux-2.6.27-720/include/linux/sched.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/sched.h 2009-05-04 12:15:13.000000000 -0400 @@ -242,12 +242,7 @@ extern void sched_init(void); extern void sched_init_smp(void); -#ifdef __cplusplus -asmlinkage void schedule_tail(struct task_struct *prev); -#else extern asmlinkage void schedule_tail(struct task_struct *prev); -#endif - extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); @@ -1982,7 +1977,7 @@ static inline unsigned long *end_of_stack(struct task_struct *p) { - return (unsigned long *)((char *) task_thread_info(p) + 1); + return (unsigned long *)(task_thread_info(p) + 1); } #endif @@ -1991,7 +1986,7 @@ { void *stack = task_stack_page(current); - return (obj >= stack) && ((char *) obj < ((char *) stack + THREAD_SIZE)); + return (obj >= stack) && (obj < (stack + THREAD_SIZE)); } extern void thread_info_cache_init(void); diff -Nurb linux-2.6.27-720/include/linux/sched.h.orig linux-2.6.27-710/include/linux/sched.h.orig --- linux-2.6.27-720/include/linux/sched.h.orig 2009-05-04 12:15:13.000000000 -0400 +++ linux-2.6.27-710/include/linux/sched.h.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,2244 +0,0 @@ -#ifndef _LINUX_SCHED_H -#define _LINUX_SCHED_H - -/* - * cloning flags: - */ -#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ -#define CLONE_VM 0x00000100 /* set if VM shared between processes */ -#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ -#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ -#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ -#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ -#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ -#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ -#define CLONE_THREAD 0x00010000 /* Same thread group? */ -#define CLONE_NEWNS 0x00020000 /* New namespace group? */ -#define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */ -#define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */ -#define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */ -#define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */ -#define CLONE_DETACHED 0x00400000 /* Unused, ignored */ -#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */ -#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */ -#define CLONE_STOPPED 0x02000000 /* Start in stopped state */ -#define CLONE_NEWUTS 0x04000000 /* New utsname group? */ -#define CLONE_NEWIPC 0x08000000 /* New ipcs */ -#define CLONE_NEWUSER 0x10000000 /* New user namespace */ -#define CLONE_NEWPID 0x20000000 /* New pid namespace */ -#define CLONE_NEWNET 0x40000000 /* New network namespace */ -#define CLONE_IO 0x80000000 /* Clone io context */ - -/* - * Scheduling policies - */ -#define SCHED_NORMAL 0 -#define SCHED_FIFO 1 -#define SCHED_RR 2 -#define SCHED_BATCH 3 -/* SCHED_ISO: reserved but not implemented yet */ -#define SCHED_IDLE 5 - -#ifdef __KERNEL__ - -struct sched_param { - int sched_priority; -}; - -#include /* for HZ */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -struct mem_cgroup; -struct exec_domain; -struct futex_pi_state; -struct robust_list_head; -struct bio; - -/* - * List of flags we want to share for kernel threads, - * if only because they are not used by them anyway. - */ -#define CLONE_KERNEL (CLONE_FS | CLONE_FILES | CLONE_SIGHAND) - -/* - * These are the constant used to fake the fixed-point load-average - * counting. Some notes: - * - 11 bit fractions expand to 22 bits by the multiplies: this gives - * a load-average precision of 10 bits integer + 11 bits fractional - * - if you want to count load-averages more often, you need more - * precision, or rounding will get you. With 2-second counting freq, - * the EXP_n values would be 1981, 2034 and 2043 if still using only - * 11 bit fractions. - */ -extern unsigned long avenrun[]; /* Load averages */ - -#define FSHIFT 11 /* nr of bits of precision */ -#define FIXED_1 (1<>= FSHIFT; - -extern unsigned long total_forks; -extern int nr_threads; -DECLARE_PER_CPU(unsigned long, process_counts); -extern int nr_processes(void); -extern unsigned long nr_running(void); -extern unsigned long nr_uninterruptible(void); -extern unsigned long nr_active(void); -extern unsigned long nr_iowait(void); - -struct seq_file; -struct cfs_rq; -struct task_group; -#ifdef CONFIG_SCHED_DEBUG -extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); -extern void proc_sched_set_task(struct task_struct *p); -extern void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); -#else -static inline void -proc_sched_show_task(struct task_struct *p, struct seq_file *m) -{ -} -static inline void proc_sched_set_task(struct task_struct *p) -{ -} -static inline void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) -{ -} -#endif - -extern unsigned long long time_sync_thresh; - -/* - * Task state bitmask. NOTE! These bits are also - * encoded in fs/proc/array.c: get_task_state(). - * - * We have two separate sets of flags: task->state - * is about runnability, while task->exit_state are - * about the task exiting. Confusing, but this way - * modifying one set can't modify the other one by - * mistake. - */ -#define TASK_RUNNING 0 -#define TASK_INTERRUPTIBLE 1 -#define TASK_UNINTERRUPTIBLE 2 -#define __TASK_STOPPED 4 -#define __TASK_TRACED 8 -/* in tsk->exit_state */ -#define EXIT_ZOMBIE 16 -#define EXIT_DEAD 32 -/* in tsk->state again */ -#define TASK_DEAD 64 -#define TASK_WAKEKILL 128 - -/* Convenience macros for the sake of set_task_state */ -#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) -#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) -#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) - -/* Convenience macros for the sake of wake_up */ -#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) -#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) - -/* get_task_state() */ -#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ - TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ - __TASK_TRACED) - -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) -#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) -#define task_is_stopped_or_traced(task) \ - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) -#define task_contributes_to_load(task) \ - ((task->state & TASK_UNINTERRUPTIBLE) != 0) - -#define __set_task_state(tsk, state_value) \ - do { (tsk)->state = (state_value); } while (0) -#define set_task_state(tsk, state_value) \ - set_mb((tsk)->state, (state_value)) - -/* - * set_current_state() includes a barrier so that the write of current->state - * is correctly serialised wrt the caller's subsequent test of whether to - * actually sleep: - * - * set_current_state(TASK_UNINTERRUPTIBLE); - * if (do_i_need_to_sleep()) - * schedule(); - * - * If the caller does not need such serialisation then use __set_current_state() - */ -#define __set_current_state(state_value) \ - do { current->state = (state_value); } while (0) -#define set_current_state(state_value) \ - set_mb(current->state, (state_value)) - -/* Task command name length */ -#define TASK_COMM_LEN 16 - -#include - -/* - * This serializes "schedule()" and also protects - * the run-queue from deletions/modifications (but - * _adding_ to the beginning of the run-queue has - * a separate lock). - */ -extern rwlock_t tasklist_lock; -extern spinlock_t mmlist_lock; - -struct task_struct; - -extern void sched_init(void); -extern void sched_init_smp(void); -extern asmlinkage void schedule_tail(struct task_struct *prev); -extern void init_idle(struct task_struct *idle, int cpu); -extern void init_idle_bootup_task(struct task_struct *idle); - -extern int runqueue_is_locked(void); - -extern cpumask_t nohz_cpu_mask; -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) -extern int select_nohz_load_balancer(int cpu); -#else -static inline int select_nohz_load_balancer(int cpu) -{ - return 0; -} -#endif - -extern unsigned long rt_needs_cpu(int cpu); - -/* - * Only dump TASK_* tasks. (0 for all tasks) - */ -extern void show_state_filter(unsigned long state_filter); - -static inline void show_state(void) -{ - show_state_filter(0); -} - -extern void show_regs(struct pt_regs *); - -/* - * TASK is a pointer to the task whose backtrace we want to see (or NULL for current - * task), SP is the stack pointer of the first frame that should be shown in the back - * trace (or NULL if the entire call-chain of the task should be shown). - */ -extern void show_stack(struct task_struct *task, unsigned long *sp); - -void io_schedule(void); -long io_schedule_timeout(long timeout); - -extern void cpu_init (void); -extern void trap_init(void); -extern void account_process_tick(struct task_struct *task, int user); -extern void update_process_times(int user); -extern void scheduler_tick(void); -extern void hrtick_resched(void); - -extern void sched_show_task(struct task_struct *p); - -#ifdef CONFIG_DETECT_SOFTLOCKUP -extern void softlockup_tick(void); -extern void touch_softlockup_watchdog(void); -extern void touch_all_softlockup_watchdogs(void); -extern unsigned int softlockup_panic; -extern unsigned long sysctl_hung_task_check_count; -extern unsigned long sysctl_hung_task_timeout_secs; -extern unsigned long sysctl_hung_task_warnings; -extern int softlockup_thresh; -#else -static inline void softlockup_tick(void) -{ -} -static inline void spawn_softlockup_task(void) -{ -} -static inline void touch_softlockup_watchdog(void) -{ -} -static inline void touch_all_softlockup_watchdogs(void) -{ -} -#endif - - -/* Attach to any functions which should be ignored in wchan output. */ -#define __sched __attribute__((__section__(".sched.text"))) - -/* Linker adds these: start and end of __sched functions */ -extern char __sched_text_start[], __sched_text_end[]; - -/* Is this address in the __sched functions? */ -extern int in_sched_functions(unsigned long addr); - -#define MAX_SCHEDULE_TIMEOUT LONG_MAX -extern signed long schedule_timeout(signed long timeout); -extern signed long schedule_timeout_interruptible(signed long timeout); -extern signed long schedule_timeout_killable(signed long timeout); -extern signed long schedule_timeout_uninterruptible(signed long timeout); -asmlinkage void schedule(void); - -struct nsproxy; -struct user_namespace; - -/* Maximum number of active map areas.. This is a random (large) number */ -#define DEFAULT_MAX_MAP_COUNT 65536 - -extern int sysctl_max_map_count; - -#include - -extern unsigned long -arch_get_unmapped_area(struct file *, unsigned long, unsigned long, - unsigned long, unsigned long); -extern unsigned long -arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); -extern void arch_unmap_area(struct mm_struct *, unsigned long); -extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); - -#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS -/* - * The mm counters are not protected by its page_table_lock, - * so must be incremented atomically. - */ -#define __set_mm_counter(mm, member, value) \ - atomic_long_set(&(mm)->_##member, value) -#define get_mm_counter(mm, member) \ - ((unsigned long)atomic_long_read(&(mm)->_##member)) -#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ -/* - * The mm counters are protected by its page_table_lock, - * so can be incremented directly. - */ -#define __set_mm_counter(mm, member, value) (mm)->_##member = (value) -#define get_mm_counter(mm, member) ((mm)->_##member) - -#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ - -#define set_mm_counter(mm, member, value) \ - vx_ ## member ## pages_sub((mm), (get_mm_counter(mm, member) - value)) -#define add_mm_counter(mm, member, value) \ - vx_ ## member ## pages_add((mm), (value)) -#define inc_mm_counter(mm, member) vx_ ## member ## pages_inc((mm)) -#define dec_mm_counter(mm, member) vx_ ## member ## pages_dec((mm)) - -#define get_mm_rss(mm) \ - (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss)) -#define update_hiwater_rss(mm) do { \ - unsigned long _rss = get_mm_rss(mm); \ - if ((mm)->hiwater_rss < _rss) \ - (mm)->hiwater_rss = _rss; \ -} while (0) -#define update_hiwater_vm(mm) do { \ - if ((mm)->hiwater_vm < (mm)->total_vm) \ - (mm)->hiwater_vm = (mm)->total_vm; \ -} while (0) - -extern void set_dumpable(struct mm_struct *mm, int value); -extern int get_dumpable(struct mm_struct *mm); - -/* mm flags */ -/* dumpable bits */ -#define MMF_DUMPABLE 0 /* core dump is permitted */ -#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ -#define MMF_DUMPABLE_BITS 2 - -/* coredump filter bits */ -#define MMF_DUMP_ANON_PRIVATE 2 -#define MMF_DUMP_ANON_SHARED 3 -#define MMF_DUMP_MAPPED_PRIVATE 4 -#define MMF_DUMP_MAPPED_SHARED 5 -#define MMF_DUMP_ELF_HEADERS 6 -#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS -#define MMF_DUMP_FILTER_BITS 5 -#define MMF_DUMP_FILTER_MASK \ - (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) -#define MMF_DUMP_FILTER_DEFAULT \ - ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED)) - -struct sighand_struct { - atomic_t count; - struct k_sigaction action[_NSIG]; - spinlock_t siglock; - wait_queue_head_t signalfd_wqh; -}; - -struct pacct_struct { - int ac_flag; - long ac_exitcode; - unsigned long ac_mem; - cputime_t ac_utime, ac_stime; - unsigned long ac_minflt, ac_majflt; -}; - -/* - * NOTE! "signal_struct" does not have it's own - * locking, because a shared signal_struct always - * implies a shared sighand_struct, so locking - * sighand_struct is always a proper superset of - * the locking of signal_struct. - */ -struct signal_struct { - atomic_t count; - atomic_t live; - - wait_queue_head_t wait_chldexit; /* for wait4() */ - - /* current thread group signal load-balancing target: */ - struct task_struct *curr_target; - - /* shared signal handling: */ - struct sigpending shared_pending; - - /* thread group exit support */ - int group_exit_code; - /* overloaded: - * - notify group_exit_task when ->count is equal to notify_count - * - everyone except group_exit_task is stopped during signal delivery - * of fatal signals, group_exit_task processes the signal. - */ - struct task_struct *group_exit_task; - int notify_count; - - /* thread group stop support, overloads group_exit_code too */ - int group_stop_count; - unsigned int flags; /* see SIGNAL_* flags below */ - - /* POSIX.1b Interval Timers */ - struct list_head posix_timers; - - /* ITIMER_REAL timer for the process */ - struct hrtimer real_timer; - struct pid *leader_pid; - ktime_t it_real_incr; - - /* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */ - cputime_t it_prof_expires, it_virt_expires; - cputime_t it_prof_incr, it_virt_incr; - - /* job control IDs */ - - /* - * pgrp and session fields are deprecated. - * use the task_session_Xnr and task_pgrp_Xnr routines below - */ - - union { - pid_t pgrp __deprecated; - pid_t __pgrp; - }; - - struct pid *tty_old_pgrp; - - union { - pid_t session __deprecated; - pid_t __session; - }; - - /* boolean value for session group leader */ - int leader; - - struct tty_struct *tty; /* NULL if no tty */ - - /* - * Cumulative resource counters for dead threads in the group, - * and for reaped dead child processes forked by this group. - * Live threads maintain their own counters and add to these - * in __exit_signal, except for the group leader. - */ - cputime_t utime, stime, cutime, cstime; - cputime_t gtime; - cputime_t cgtime; - unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; - unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; - unsigned long inblock, oublock, cinblock, coublock; - struct task_io_accounting ioac; - - /* - * Cumulative ns of scheduled CPU time for dead threads in the - * group, not including a zombie group leader. (This only differs - * from jiffies_to_ns(utime + stime) if sched_clock uses something - * other than jiffies.) - */ - unsigned long long sum_sched_runtime; - - /* - * We don't bother to synchronize most readers of this at all, - * because there is no reader checking a limit that actually needs - * to get both rlim_cur and rlim_max atomically, and either one - * alone is a single word that can safely be read normally. - * getrlimit/setrlimit use task_lock(current->group_leader) to - * protect this instead of the siglock, because they really - * have no need to disable irqs. - */ - struct rlimit rlim[RLIM_NLIMITS]; - - struct list_head cpu_timers[3]; - - /* keep the process-shared keyrings here so that they do the right - * thing in threads created with CLONE_THREAD */ -#ifdef CONFIG_KEYS - struct key *session_keyring; /* keyring inherited over fork */ - struct key *process_keyring; /* keyring private to this process */ -#endif -#ifdef CONFIG_BSD_PROCESS_ACCT - struct pacct_struct pacct; /* per-process accounting information */ -#endif -#ifdef CONFIG_TASKSTATS - struct taskstats *stats; -#endif -#ifdef CONFIG_AUDIT - unsigned audit_tty; - struct tty_audit_buf *tty_audit_buf; -#endif -}; - -/* Context switch must be unlocked if interrupts are to be enabled */ -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW -# define __ARCH_WANT_UNLOCKED_CTXSW -#endif - -/* - * Bits in flags field of signal_struct. - */ -#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ -#define SIGNAL_STOP_DEQUEUED 0x00000002 /* stop signal dequeued */ -#define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */ -#define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */ -/* - * Pending notifications to parent. - */ -#define SIGNAL_CLD_STOPPED 0x00000010 -#define SIGNAL_CLD_CONTINUED 0x00000020 -#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) - -#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ - -/* If true, all threads except ->group_exit_task have pending SIGKILL */ -static inline int signal_group_exit(const struct signal_struct *sig) -{ - return (sig->flags & SIGNAL_GROUP_EXIT) || - (sig->group_exit_task != NULL); -} - -/* - * Some day this will be a full-fledged user tracking system.. - */ -struct user_struct { - atomic_t __count; /* reference count */ - atomic_t processes; /* How many processes does this user have? */ - atomic_t files; /* How many open files does this user have? */ - atomic_t sigpending; /* How many pending signals does this user have? */ -#ifdef CONFIG_INOTIFY_USER - atomic_t inotify_watches; /* How many inotify watches does this user have? */ - atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ -#endif -#ifdef CONFIG_EPOLL - atomic_t epoll_watches; /* The number of file descriptors currently watched */ -#endif -#ifdef CONFIG_POSIX_MQUEUE - /* protected by mq_lock */ - unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ -#endif - unsigned long locked_shm; /* How many pages of mlocked shm ? */ - -#ifdef CONFIG_KEYS - struct key *uid_keyring; /* UID specific keyring */ - struct key *session_keyring; /* UID's default session keyring */ -#endif - - /* Hash table maintenance information */ - struct hlist_node uidhash_node; - uid_t uid; - -#ifdef CONFIG_USER_SCHED - struct task_group *tg; -#ifdef CONFIG_SYSFS - struct kobject kobj; - struct work_struct work; -#endif -#endif -}; - -extern int uids_sysfs_init(void); - -extern struct user_struct *find_user(uid_t); - -extern struct user_struct root_user; -#define INIT_USER (&root_user) - -struct backing_dev_info; -struct reclaim_state; - -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) -struct sched_info { - /* cumulative counters */ - unsigned long pcount; /* # of times run on this cpu */ - unsigned long long cpu_time, /* time spent on the cpu */ - run_delay; /* time spent waiting on a runqueue */ - - /* timestamps */ - unsigned long long last_arrival,/* when we last ran on a cpu */ - last_queued; /* when we were last queued to run */ -#ifdef CONFIG_SCHEDSTATS - /* BKL stats */ - unsigned int bkl_count; -#endif -}; -#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ - -#ifdef CONFIG_SCHEDSTATS -extern const struct file_operations proc_schedstat_operations; -#endif /* CONFIG_SCHEDSTATS */ - -#ifdef CONFIG_TASK_DELAY_ACCT -struct task_delay_info { - spinlock_t lock; - unsigned int flags; /* Private per-task flags */ - - /* For each stat XXX, add following, aligned appropriately - * - * struct timespec XXX_start, XXX_end; - * u64 XXX_delay; - * u32 XXX_count; - * - * Atomicity of updates to XXX_delay, XXX_count protected by - * single lock above (split into XXX_lock if contention is an issue). - */ - - /* - * XXX_count is incremented on every XXX operation, the delay - * associated with the operation is added to XXX_delay. - * XXX_delay contains the accumulated delay time in nanoseconds. - */ - struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */ - u64 blkio_delay; /* wait for sync block io completion */ - u64 swapin_delay; /* wait for swapin block io completion */ - u32 blkio_count; /* total count of the number of sync block */ - /* io operations performed */ - u32 swapin_count; /* total count of the number of swapin block */ - /* io operations performed */ - - struct timespec freepages_start, freepages_end; - u64 freepages_delay; /* wait for memory reclaim */ - u32 freepages_count; /* total count of memory reclaim */ -}; -#endif /* CONFIG_TASK_DELAY_ACCT */ - -static inline int sched_info_on(void) -{ -#ifdef CONFIG_SCHEDSTATS - return 1; -#elif defined(CONFIG_TASK_DELAY_ACCT) - extern int delayacct_on; - return delayacct_on; -#else - return 0; -#endif -} - -enum cpu_idle_type { - CPU_IDLE, - CPU_NOT_IDLE, - CPU_NEWLY_IDLE, - CPU_MAX_IDLE_TYPES -}; - -/* - * sched-domains (multiprocessor balancing) declarations: - */ - -/* - * Increase resolution of nice-level calculations: - */ -#define SCHED_LOAD_SHIFT 10 -#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) - -#define SCHED_LOAD_SCALE_FUZZ SCHED_LOAD_SCALE - -#ifdef CONFIG_SMP -#define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ -#define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */ -#define SD_BALANCE_EXEC 4 /* Balance on exec */ -#define SD_BALANCE_FORK 8 /* Balance on fork, clone */ -#define SD_WAKE_IDLE 16 /* Wake to idle CPU on task wakeup */ -#define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */ -#define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */ -#define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */ -#define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ -#define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */ -#define SD_SERIALIZE 1024 /* Only a single load balancing instance */ -#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ - -#define BALANCE_FOR_MC_POWER \ - (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) - -#define BALANCE_FOR_PKG_POWER \ - ((sched_mc_power_savings || sched_smt_power_savings) ? \ - SD_POWERSAVINGS_BALANCE : 0) - -#define test_sd_parent(sd, flag) ((sd->parent && \ - (sd->parent->flags & flag)) ? 1 : 0) - - -struct sched_group { - struct sched_group *next; /* Must be a circular list */ - cpumask_t cpumask; - - /* - * CPU power of this group, SCHED_LOAD_SCALE being max power for a - * single CPU. This is read only (except for setup, hotplug CPU). - * Note : Never change cpu_power without recompute its reciprocal - */ - unsigned int __cpu_power; - /* - * reciprocal value of cpu_power to avoid expensive divides - * (see include/linux/reciprocal_div.h) - */ - u32 reciprocal_cpu_power; -}; - -enum sched_domain_level { - SD_LV_NONE = 0, - SD_LV_SIBLING, - SD_LV_MC, - SD_LV_CPU, - SD_LV_NODE, - SD_LV_ALLNODES, - SD_LV_MAX -}; - -struct sched_domain_attr { - int relax_domain_level; -}; - -#define SD_ATTR_INIT (struct sched_domain_attr) { \ - .relax_domain_level = -1, \ -} - -struct sched_domain { - /* These fields must be setup */ - struct sched_domain *parent; /* top domain must be null terminated */ - struct sched_domain *child; /* bottom domain must be null terminated */ - struct sched_group *groups; /* the balancing groups of the domain */ - cpumask_t span; /* span of all CPUs in this domain */ - unsigned long min_interval; /* Minimum balance interval ms */ - unsigned long max_interval; /* Maximum balance interval ms */ - unsigned int busy_factor; /* less balancing by factor if busy */ - unsigned int imbalance_pct; /* No balance until over watermark */ - unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ - unsigned int busy_idx; - unsigned int idle_idx; - unsigned int newidle_idx; - unsigned int wake_idx; - unsigned int forkexec_idx; - int flags; /* See SD_* */ - enum sched_domain_level level; - - /* Runtime fields. */ - unsigned long last_balance; /* init to jiffies. units in jiffies */ - unsigned int balance_interval; /* initialise to 1. units in ms. */ - unsigned int nr_balance_failed; /* initialise to 0 */ - - u64 last_update; - -#ifdef CONFIG_SCHEDSTATS - /* load_balance() stats */ - unsigned int lb_count[CPU_MAX_IDLE_TYPES]; - unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; - unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; - unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; - unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; - unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; - unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; - unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; - - /* Active load balancing */ - unsigned int alb_count; - unsigned int alb_failed; - unsigned int alb_pushed; - - /* SD_BALANCE_EXEC stats */ - unsigned int sbe_count; - unsigned int sbe_balanced; - unsigned int sbe_pushed; - - /* SD_BALANCE_FORK stats */ - unsigned int sbf_count; - unsigned int sbf_balanced; - unsigned int sbf_pushed; - - /* try_to_wake_up() stats */ - unsigned int ttwu_wake_remote; - unsigned int ttwu_move_affine; - unsigned int ttwu_move_balance; -#endif -}; - -extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, - struct sched_domain_attr *dattr_new); -extern int arch_reinit_sched_domains(void); - -#else /* CONFIG_SMP */ - -struct sched_domain_attr; - -static inline void -partition_sched_domains(int ndoms_new, cpumask_t *doms_new, - struct sched_domain_attr *dattr_new) -{ -} -#endif /* !CONFIG_SMP */ - -struct io_context; /* See blkdev.h */ -#define NGROUPS_SMALL 32 -#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) -struct group_info { - int ngroups; - atomic_t usage; - gid_t small_block[NGROUPS_SMALL]; - int nblocks; - gid_t *blocks[0]; -}; - -/* - * get_group_info() must be called with the owning task locked (via task_lock()) - * when task != current. The reason being that the vast majority of callers are - * looking at current->group_info, which can not be changed except by the - * current task. Changing current->group_info requires the task lock, too. - */ -#define get_group_info(group_info) do { \ - atomic_inc(&(group_info)->usage); \ -} while (0) - -#define put_group_info(group_info) do { \ - if (atomic_dec_and_test(&(group_info)->usage)) \ - groups_free(group_info); \ -} while (0) - -extern struct group_info *groups_alloc(int gidsetsize); -extern void groups_free(struct group_info *group_info); -extern int set_current_groups(struct group_info *group_info); -extern int groups_search(struct group_info *group_info, gid_t grp); -/* access the groups "array" with this macro */ -#define GROUP_AT(gi, i) \ - ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) - -#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK -extern void prefetch_stack(struct task_struct *t); -#else -static inline void prefetch_stack(struct task_struct *t) { } -#endif - -struct audit_context; /* See audit.c */ -struct mempolicy; -struct pipe_inode_info; -struct uts_namespace; - -struct rq; -struct sched_domain; - -struct sched_class { - const struct sched_class *next; - - void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); - void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); - void (*yield_task) (struct rq *rq); - int (*select_task_rq)(struct task_struct *p, int sync); - - void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); - - struct task_struct * (*pick_next_task) (struct rq *rq); - void (*put_prev_task) (struct rq *rq, struct task_struct *p); - -#ifdef CONFIG_SMP - unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, - struct rq *busiest, unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, int *this_best_prio); - - int (*move_one_task) (struct rq *this_rq, int this_cpu, - struct rq *busiest, struct sched_domain *sd, - enum cpu_idle_type idle); - void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); - void (*post_schedule) (struct rq *this_rq); - void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); -#endif - - void (*set_curr_task) (struct rq *rq); - void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); - void (*task_new) (struct rq *rq, struct task_struct *p); - void (*set_cpus_allowed)(struct task_struct *p, - const cpumask_t *newmask); - - void (*rq_online)(struct rq *rq); - void (*rq_offline)(struct rq *rq); - - void (*switched_from) (struct rq *this_rq, struct task_struct *task, - int running); - void (*switched_to) (struct rq *this_rq, struct task_struct *task, - int running); - void (*prio_changed) (struct rq *this_rq, struct task_struct *task, - int oldprio, int running); - -#ifdef CONFIG_FAIR_GROUP_SCHED - void (*moved_group) (struct task_struct *p); -#endif -}; - -struct load_weight { - unsigned long weight, inv_weight; -}; - -/* - * CFS stats for a schedulable entity (task, task-group etc) - * - * Current field usage histogram: - * - * 4 se->block_start - * 4 se->run_node - * 4 se->sleep_start - * 6 se->load.weight - */ -struct sched_entity { - struct load_weight load; /* for load-balancing */ - struct rb_node run_node; - struct list_head group_node; - unsigned int on_rq; - - u64 exec_start; - u64 sum_exec_runtime; - u64 vruntime; - u64 prev_sum_exec_runtime; - - u64 last_wakeup; - u64 avg_overlap; - -#ifdef CONFIG_SCHEDSTATS - u64 wait_start; - u64 wait_max; - u64 wait_count; - u64 wait_sum; - - u64 sleep_start; - u64 sleep_max; - s64 sum_sleep_runtime; - - u64 block_start; - u64 block_max; - u64 exec_max; - u64 slice_max; - - u64 nr_migrations; - u64 nr_migrations_cold; - u64 nr_failed_migrations_affine; - u64 nr_failed_migrations_running; - u64 nr_failed_migrations_hot; - u64 nr_forced_migrations; - u64 nr_forced2_migrations; - - u64 nr_wakeups; - u64 nr_wakeups_sync; - u64 nr_wakeups_migrate; - u64 nr_wakeups_local; - u64 nr_wakeups_remote; - u64 nr_wakeups_affine; - u64 nr_wakeups_affine_attempts; - u64 nr_wakeups_passive; - u64 nr_wakeups_idle; -#endif - -#ifdef CONFIG_FAIR_GROUP_SCHED - struct sched_entity *parent; - /* rq on which this entity is (to be) queued: */ - struct cfs_rq *cfs_rq; - /* rq "owned" by this entity/group: */ - struct cfs_rq *my_q; -#endif -}; - -struct sched_rt_entity { - struct list_head run_list; - unsigned int time_slice; - unsigned long timeout; - int nr_cpus_allowed; - - struct sched_rt_entity *back; -#ifdef CONFIG_RT_GROUP_SCHED - struct sched_rt_entity *parent; - /* rq on which this entity is (to be) queued: */ - struct rt_rq *rt_rq; - /* rq "owned" by this entity/group: */ - struct rt_rq *my_q; -#endif -}; - -struct task_struct { - volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ - void *stack; - atomic_t usage; - unsigned int flags; /* per process flags, defined below */ - unsigned int ptrace; - - int lock_depth; /* BKL lock depth */ - -#ifdef CONFIG_SMP -#ifdef __ARCH_WANT_UNLOCKED_CTXSW - int oncpu; -#endif -#endif - - int prio, static_prio, normal_prio; - unsigned int rt_priority; - const struct sched_class *sched_class; - struct sched_entity se; - struct sched_rt_entity rt; - -#ifdef CONFIG_PREEMPT_NOTIFIERS - /* list of struct preempt_notifier: */ - struct hlist_head preempt_notifiers; -#endif - - /* - * fpu_counter contains the number of consecutive context switches - * that the FPU is used. If this is over a threshold, the lazy fpu - * saving becomes unlazy to save the trap. This is an unsigned char - * so that after 256 times the counter wraps and the behavior turns - * lazy again; this to deal with bursty apps that only use FPU for - * a short time - */ - unsigned char fpu_counter; - s8 oomkilladj; /* OOM kill score adjustment (bit shift). */ -#ifdef CONFIG_BLK_DEV_IO_TRACE - unsigned int btrace_seq; -#endif - - unsigned int policy; - cpumask_t cpus_allowed; - -#ifdef CONFIG_PREEMPT_RCU - int rcu_read_lock_nesting; - int rcu_flipctr_idx; -#endif /* #ifdef CONFIG_PREEMPT_RCU */ - -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) - struct sched_info sched_info; -#endif - - struct list_head tasks; - - struct mm_struct *mm, *active_mm; - -/* task state */ - struct linux_binfmt *binfmt; - int exit_state; - int exit_code, exit_signal; - int pdeath_signal; /* The signal sent when the parent dies */ - /* ??? */ - unsigned int personality; - unsigned did_exec:1; - pid_t pid; - pid_t tgid; - -#ifdef CONFIG_CC_STACKPROTECTOR - /* Canary value for the -fstack-protector gcc feature */ - unsigned long stack_canary; -#endif - /* - * pointers to (original) parent process, youngest child, younger sibling, - * older sibling, respectively. (p->father can be replaced with - * p->real_parent->pid) - */ - struct task_struct *real_parent; /* real parent process */ - struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */ - /* - * children/sibling forms the list of my natural children - */ - struct list_head children; /* list of my children */ - struct list_head sibling; /* linkage in my parent's children list */ - struct task_struct *group_leader; /* threadgroup leader */ - - /* - * ptraced is the list of tasks this task is using ptrace on. - * This includes both natural children and PTRACE_ATTACH targets. - * p->ptrace_entry is p's link on the p->parent->ptraced list. - */ - struct list_head ptraced; - struct list_head ptrace_entry; - - /* PID/PID hash table linkage. */ - struct pid_link pids[PIDTYPE_MAX]; - struct list_head thread_group; - - struct completion *vfork_done; /* for vfork() */ - int __user *set_child_tid; /* CLONE_CHILD_SETTID */ - int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ - - cputime_t utime, stime, utimescaled, stimescaled; - cputime_t gtime; - cputime_t prev_utime, prev_stime; - unsigned long nvcsw, nivcsw; /* context switch counts */ - struct timespec start_time; /* monotonic time */ - struct timespec real_start_time; /* boot based time */ -/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ - unsigned long min_flt, maj_flt; - - cputime_t it_prof_expires, it_virt_expires; - unsigned long long it_sched_expires; - struct list_head cpu_timers[3]; - -/* process credentials */ - uid_t uid,euid,suid,fsuid; - gid_t gid,egid,sgid,fsgid; - struct group_info *group_info; - kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; - struct user_struct *user; - unsigned securebits; -#ifdef CONFIG_KEYS - unsigned char jit_keyring; /* default keyring to attach requested keys to */ - struct key *request_key_auth; /* assumed request_key authority */ - struct key *thread_keyring; /* keyring private to this thread */ -#endif - char comm[TASK_COMM_LEN]; /* executable name excluding path - - access with [gs]et_task_comm (which lock - it with task_lock()) - - initialized normally by flush_old_exec */ -/* file system info */ - int link_count, total_link_count; -#ifdef CONFIG_SYSVIPC -/* ipc stuff */ - struct sysv_sem sysvsem; -#endif -#ifdef CONFIG_DETECT_SOFTLOCKUP -/* hung task detection */ - unsigned long last_switch_timestamp; - unsigned long last_switch_count; -#endif -/* CPU-specific state of this task */ - struct thread_struct thread; -/* filesystem information */ - struct fs_struct *fs; -/* open file information */ - struct files_struct *files; -/* namespaces */ - struct nsproxy *nsproxy; -/* signal handlers */ - struct signal_struct *signal; - struct sighand_struct *sighand; - - sigset_t blocked, real_blocked; - sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ - struct sigpending pending; - - unsigned long sas_ss_sp; - size_t sas_ss_size; - int (*notifier)(void *priv); - void *notifier_data; - sigset_t *notifier_mask; -#ifdef CONFIG_SECURITY - void *security; -#endif - struct audit_context *audit_context; -#ifdef CONFIG_AUDITSYSCALL - uid_t loginuid; - unsigned int sessionid; -#endif - seccomp_t seccomp; - -/* vserver context data */ - struct vx_info *vx_info; - struct nx_info *nx_info; - - xid_t xid; - nid_t nid; - tag_t tag; - -/* Thread group tracking */ - u32 parent_exec_id; - u32 self_exec_id; -/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */ - spinlock_t alloc_lock; - - /* Protection of the PI data structures: */ - spinlock_t pi_lock; - -#ifdef CONFIG_RT_MUTEXES - /* PI waiters blocked on a rt_mutex held by this task */ - struct plist_head pi_waiters; - /* Deadlock detection and priority inheritance handling */ - struct rt_mutex_waiter *pi_blocked_on; -#endif - -#ifdef CONFIG_DEBUG_MUTEXES - /* mutex deadlock detection */ - struct mutex_waiter *blocked_on; -#endif -#ifdef CONFIG_TRACE_IRQFLAGS - unsigned int irq_events; - int hardirqs_enabled; - unsigned long hardirq_enable_ip; - unsigned int hardirq_enable_event; - unsigned long hardirq_disable_ip; - unsigned int hardirq_disable_event; - int softirqs_enabled; - unsigned long softirq_disable_ip; - unsigned int softirq_disable_event; - unsigned long softirq_enable_ip; - unsigned int softirq_enable_event; - int hardirq_context; - int softirq_context; -#endif -#ifdef CONFIG_LOCKDEP -# define MAX_LOCK_DEPTH 48UL - u64 curr_chain_key; - int lockdep_depth; - unsigned int lockdep_recursion; - struct held_lock held_locks[MAX_LOCK_DEPTH]; -#endif - -/* journalling filesystem info */ - void *journal_info; - -/* stacked block device info */ - struct bio *bio_list, **bio_tail; - -/* VM state */ - struct reclaim_state *reclaim_state; - - struct backing_dev_info *backing_dev_info; - - struct io_context *io_context; - - unsigned long ptrace_message; - siginfo_t *last_siginfo; /* For ptrace use. */ - struct task_io_accounting ioac; -#if defined(CONFIG_TASK_XACCT) - u64 acct_rss_mem1; /* accumulated rss usage */ - u64 acct_vm_mem1; /* accumulated virtual memory usage */ - cputime_t acct_timexpd; /* stime + utime since last update */ -#endif -#ifdef CONFIG_CPUSETS - nodemask_t mems_allowed; - int cpuset_mems_generation; - int cpuset_mem_spread_rotor; -#endif -#ifdef CONFIG_CGROUPS - /* Control Group info protected by css_set_lock */ - struct css_set *cgroups; - /* cg_list protected by css_set_lock and tsk->alloc_lock */ - struct list_head cg_list; -#endif -#ifdef CONFIG_FUTEX - struct robust_list_head __user *robust_list; -#ifdef CONFIG_COMPAT - struct compat_robust_list_head __user *compat_robust_list; -#endif - struct list_head pi_state_list; - struct futex_pi_state *pi_state_cache; -#endif -#ifdef CONFIG_NUMA - struct mempolicy *mempolicy; - short il_next; -#endif - atomic_t fs_excl; /* holding fs exclusive resources */ - struct rcu_head rcu; - - struct list_head *scm_work_list; - -/* - * cache last used pipe for splice - */ - struct pipe_inode_info *splice_pipe; -#ifdef CONFIG_TASK_DELAY_ACCT - struct task_delay_info *delays; -#endif -#ifdef CONFIG_FAULT_INJECTION - int make_it_fail; -#endif - struct prop_local_single dirties; -#ifdef CONFIG_LATENCYTOP - int latency_record_count; - struct latency_record latency_record[LT_SAVECOUNT]; -#endif -}; - -/* - * Priority of a process goes from 0..MAX_PRIO-1, valid RT - * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH - * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority - * values are inverted: lower p->prio value means higher priority. - * - * The MAX_USER_RT_PRIO value allows the actual maximum - * RT priority to be separate from the value exported to - * user-space. This allows kernel threads to set their - * priority to a value higher than any user task. Note: - * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. - */ - -#define MAX_USER_RT_PRIO 100 -#define MAX_RT_PRIO MAX_USER_RT_PRIO - -#define MAX_PRIO (MAX_RT_PRIO + 40) -#define DEFAULT_PRIO (MAX_RT_PRIO + 20) - -static inline int rt_prio(int prio) -{ - if (unlikely(prio < MAX_RT_PRIO)) - return 1; - return 0; -} - -static inline int rt_task(struct task_struct *p) -{ - return rt_prio(p->prio); -} - -static inline void set_task_session(struct task_struct *tsk, pid_t session) -{ - tsk->signal->__session = session; -} - -static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp) -{ - tsk->signal->__pgrp = pgrp; -} - -static inline struct pid *task_pid(struct task_struct *task) -{ - return task->pids[PIDTYPE_PID].pid; -} - -static inline struct pid *task_tgid(struct task_struct *task) -{ - return task->group_leader->pids[PIDTYPE_PID].pid; -} - -static inline struct pid *task_pgrp(struct task_struct *task) -{ - return task->group_leader->pids[PIDTYPE_PGID].pid; -} - -static inline struct pid *task_session(struct task_struct *task) -{ - return task->group_leader->pids[PIDTYPE_SID].pid; -} - -struct pid_namespace; - -/* - * the helpers to get the task's different pids as they are seen - * from various namespaces - * - * task_xid_nr() : global id, i.e. the id seen from the init namespace; - * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of - * current. - * task_xid_nr_ns() : id seen from the ns specified; - * - * set_task_vxid() : assigns a virtual id to a task; - * - * see also pid_nr() etc in include/linux/pid.h - */ - -#include -#include -#include -#include - -static inline pid_t task_pid_nr(struct task_struct *tsk) -{ - return tsk->pid; -} - -pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); - -static inline pid_t task_pid_vnr(struct task_struct *tsk) -{ - return vx_map_pid(pid_vnr(task_pid(tsk))); -} - - -static inline pid_t task_tgid_nr(struct task_struct *tsk) -{ - return tsk->tgid; -} - -pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); - -static inline pid_t task_tgid_vnr(struct task_struct *tsk) -{ - return vx_map_tgid(pid_vnr(task_tgid(tsk))); -} - - -static inline pid_t task_pgrp_nr(struct task_struct *tsk) -{ - return tsk->signal->__pgrp; -} - -pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); - -static inline pid_t task_pgrp_vnr(struct task_struct *tsk) -{ - return pid_vnr(task_pgrp(tsk)); -} - - -static inline pid_t task_session_nr(struct task_struct *tsk) -{ - return tsk->signal->__session; -} - -pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); - -static inline pid_t task_session_vnr(struct task_struct *tsk) -{ - return pid_vnr(task_session(tsk)); -} - - -/** - * pid_alive - check that a task structure is not stale - * @p: Task structure to be checked. - * - * Test if a process is not yet dead (at most zombie state) - * If pid_alive fails, then pointers within the task structure - * can be stale and must not be dereferenced. - */ -static inline int pid_alive(struct task_struct *p) -{ - return p->pids[PIDTYPE_PID].pid != NULL; -} - -/** - * is_global_init - check if a task structure is init - * @tsk: Task structure to be checked. - * - * Check if a task structure is the first user space task the kernel created. - */ -static inline int is_global_init(struct task_struct *tsk) -{ - return tsk->pid == 1; -} - -/* - * is_container_init: - * check whether in the task is init in its own pid namespace. - */ -extern int is_container_init(struct task_struct *tsk); - -extern struct pid *cad_pid; - -extern void free_task(struct task_struct *tsk); -#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) - -extern void __put_task_struct(struct task_struct *t); - -static inline void put_task_struct(struct task_struct *t) -{ - if (atomic_dec_and_test(&t->usage)) - __put_task_struct(t); -} - -extern cputime_t task_utime(struct task_struct *p); -extern cputime_t task_stime(struct task_struct *p); -extern cputime_t task_gtime(struct task_struct *p); - -/* - * Per process flags - */ -#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ - /* Not implemented yet, only for 486*/ -#define PF_STARTING 0x00000002 /* being created */ -#define PF_EXITING 0x00000004 /* getting shut down */ -#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ -#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ -#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ -#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ -#define PF_DUMPCORE 0x00000200 /* dumped core */ -#define PF_SIGNALED 0x00000400 /* killed by a signal */ -#define PF_MEMALLOC 0x00000800 /* Allocating memory */ -#define PF_FLUSHER 0x00001000 /* responsible for disk writeback */ -#define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ -#define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ -#define PF_FROZEN 0x00010000 /* frozen for system suspend */ -#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ -#define PF_KSWAPD 0x00040000 /* I am kswapd */ -#define PF_SWAPOFF 0x00080000 /* I am in swapoff */ -#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ -#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ -#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ -#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ -#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ -#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ -#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ -#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ -#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ -#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ - -/* - * Only the _current_ task can read/write to tsk->flags, but other - * tasks can access tsk->flags in readonly mode for example - * with tsk_used_math (like during threaded core dumping). - * There is however an exception to this rule during ptrace - * or during fork: the ptracer task is allowed to write to the - * child->flags of its traced child (same goes for fork, the parent - * can write to the child->flags), because we're guaranteed the - * child is not running and in turn not changing child->flags - * at the same time the parent does it. - */ -#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) -#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) -#define clear_used_math() clear_stopped_child_used_math(current) -#define set_used_math() set_stopped_child_used_math(current) -#define conditional_stopped_child_used_math(condition, child) \ - do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) -#define conditional_used_math(condition) \ - conditional_stopped_child_used_math(condition, current) -#define copy_to_stopped_child_used_math(child) \ - do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) -/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ -#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) -#define used_math() tsk_used_math(current) - -#ifdef CONFIG_SMP -extern int set_cpus_allowed_ptr(struct task_struct *p, - const cpumask_t *new_mask); -#else -static inline int set_cpus_allowed_ptr(struct task_struct *p, - const cpumask_t *new_mask) -{ - if (!cpu_isset(0, *new_mask)) - return -EINVAL; - return 0; -} -#endif -static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) -{ - return set_cpus_allowed_ptr(p, &new_mask); -} - -extern unsigned long long sched_clock(void); - -extern void sched_clock_init(void); -extern u64 sched_clock_cpu(int cpu); - -#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -static inline void sched_clock_tick(void) -{ -} - -static inline void sched_clock_idle_sleep_event(void) -{ -} - -static inline void sched_clock_idle_wakeup_event(u64 delta_ns) -{ -} -#else -extern void sched_clock_tick(void); -extern void sched_clock_idle_sleep_event(void); -extern void sched_clock_idle_wakeup_event(u64 delta_ns); -#endif - -/* - * For kernel-internal use: high-speed (but slightly incorrect) per-cpu - * clock constructed from sched_clock(): - */ -extern unsigned long long cpu_clock(int cpu); - -extern unsigned long long -task_sched_runtime(struct task_struct *task); - -/* sched_exec is called by processes performing an exec */ -#ifdef CONFIG_SMP -extern void sched_exec(void); -#else -#define sched_exec() {} -#endif - -extern void sched_clock_idle_sleep_event(void); -extern void sched_clock_idle_wakeup_event(u64 delta_ns); - -#ifdef CONFIG_HOTPLUG_CPU -extern void idle_task_exit(void); -#else -static inline void idle_task_exit(void) {} -#endif - -extern void sched_idle_next(void); - -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) -extern void wake_up_idle_cpu(int cpu); -#else -static inline void wake_up_idle_cpu(int cpu) { } -#endif - -#ifdef CONFIG_SCHED_DEBUG -extern unsigned int sysctl_sched_latency; -extern unsigned int sysctl_sched_min_granularity; -extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_child_runs_first; -extern unsigned int sysctl_sched_features; -extern unsigned int sysctl_sched_migration_cost; -extern unsigned int sysctl_sched_nr_migrate; -extern unsigned int sysctl_sched_shares_ratelimit; - -int sched_nr_latency_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, - loff_t *ppos); -#endif -extern unsigned int sysctl_sched_rt_period; -extern int sysctl_sched_rt_runtime; - -int sched_rt_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, - loff_t *ppos); - -extern unsigned int sysctl_sched_compat_yield; - -#ifdef CONFIG_RT_MUTEXES -extern int rt_mutex_getprio(struct task_struct *p); -extern void rt_mutex_setprio(struct task_struct *p, int prio); -extern void rt_mutex_adjust_pi(struct task_struct *p); -#else -static inline int rt_mutex_getprio(struct task_struct *p) -{ - return p->normal_prio; -} -# define rt_mutex_adjust_pi(p) do { } while (0) -#endif - -extern void set_user_nice(struct task_struct *p, long nice); -extern int task_prio(const struct task_struct *p); -extern int task_nice(const struct task_struct *p); -extern int can_nice(const struct task_struct *p, const int nice); -extern int task_curr(const struct task_struct *p); -extern int idle_cpu(int cpu); -extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); -extern int sched_setscheduler_nocheck(struct task_struct *, int, - struct sched_param *); -extern struct task_struct *idle_task(int cpu); -extern struct task_struct *curr_task(int cpu); -extern void set_curr_task(int cpu, struct task_struct *p); - -void yield(void); - -/* - * The default (Linux) execution domain. - */ -extern struct exec_domain default_exec_domain; - -union thread_union { - struct thread_info thread_info; - unsigned long stack[THREAD_SIZE/sizeof(long)]; -}; - -#ifndef __HAVE_ARCH_KSTACK_END -static inline int kstack_end(void *addr) -{ - /* Reliable end of stack detection: - * Some APM bios versions misalign the stack - */ - return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); -} -#endif - -extern union thread_union init_thread_union; -extern struct task_struct init_task; - -extern struct mm_struct init_mm; - -extern struct pid_namespace init_pid_ns; - -/* - * find a task by one of its numerical ids - * - * find_task_by_pid_type_ns(): - * it is the most generic call - it finds a task by all id, - * type and namespace specified - * find_task_by_pid_ns(): - * finds a task by its pid in the specified namespace - * find_task_by_vpid(): - * finds a task by its virtual pid - * - * see also find_vpid() etc in include/linux/pid.h - */ - -extern struct task_struct *find_task_by_pid_type_ns(int type, int pid, - struct pid_namespace *ns); - -extern struct task_struct *find_task_by_vpid(pid_t nr); -extern struct task_struct *find_task_by_pid_ns(pid_t nr, - struct pid_namespace *ns); - -extern void __set_special_pids(struct pid *pid); - -/* per-UID process charging. */ -extern struct user_struct * alloc_uid(struct user_namespace *, uid_t); -static inline struct user_struct *get_uid(struct user_struct *u) -{ - atomic_inc(&u->__count); - return u; -} -extern void free_uid(struct user_struct *); -extern void switch_uid(struct user_struct *); -extern void release_uids(struct user_namespace *ns); - -#include - -extern void do_timer(unsigned long ticks); - -extern int wake_up_state(struct task_struct *tsk, unsigned int state); -extern int wake_up_process(struct task_struct *tsk); -extern void wake_up_new_task(struct task_struct *tsk, - unsigned long clone_flags); -#ifdef CONFIG_SMP - extern void kick_process(struct task_struct *tsk); -#else - static inline void kick_process(struct task_struct *tsk) { } -#endif -extern void sched_fork(struct task_struct *p, int clone_flags); -extern void sched_dead(struct task_struct *p); - -extern int in_group_p(gid_t); -extern int in_egroup_p(gid_t); - -extern void proc_caches_init(void); -extern void flush_signals(struct task_struct *); -extern void ignore_signals(struct task_struct *); -extern void flush_signal_handlers(struct task_struct *, int force_default); -extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); - -static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&tsk->sighand->siglock, flags); - ret = dequeue_signal(tsk, mask, info); - spin_unlock_irqrestore(&tsk->sighand->siglock, flags); - - return ret; -} - -extern void block_all_signals(int (*notifier)(void *priv), void *priv, - sigset_t *mask); -extern void unblock_all_signals(void); -extern void release_task(struct task_struct * p); -extern int send_sig_info(int, struct siginfo *, struct task_struct *); -extern int force_sigsegv(int, struct task_struct *); -extern int force_sig_info(int, struct siginfo *, struct task_struct *); -extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); -extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); -extern int kill_pid_info_as_uid(int, struct siginfo *, struct pid *, uid_t, uid_t, u32); -extern int kill_pgrp(struct pid *pid, int sig, int priv); -extern int kill_pid(struct pid *pid, int sig, int priv); -extern int kill_proc_info(int, struct siginfo *, pid_t); -extern int do_notify_parent(struct task_struct *, int); -extern void force_sig(int, struct task_struct *); -extern void force_sig_specific(int, struct task_struct *); -extern int send_sig(int, struct task_struct *, int); -extern void zap_other_threads(struct task_struct *p); -extern struct sigqueue *sigqueue_alloc(void); -extern void sigqueue_free(struct sigqueue *); -extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); -extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); -extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long); - -static inline int kill_cad_pid(int sig, int priv) -{ - return kill_pid(cad_pid, sig, priv); -} - -/* These can be the second arg to send_sig_info/send_group_sig_info. */ -#define SEND_SIG_NOINFO ((struct siginfo *) 0) -#define SEND_SIG_PRIV ((struct siginfo *) 1) -#define SEND_SIG_FORCED ((struct siginfo *) 2) - -static inline int is_si_special(const struct siginfo *info) -{ - return info <= SEND_SIG_FORCED; -} - -/* True if we are on the alternate signal stack. */ - -static inline int on_sig_stack(unsigned long sp) -{ - return (sp - current->sas_ss_sp < current->sas_ss_size); -} - -static inline int sas_ss_flags(unsigned long sp) -{ - return (current->sas_ss_size == 0 ? SS_DISABLE - : on_sig_stack(sp) ? SS_ONSTACK : 0); -} - -/* - * Routines for handling mm_structs - */ -extern struct mm_struct * mm_alloc(void); - -/* mmdrop drops the mm and the page tables */ -extern void __mmdrop(struct mm_struct *); -static inline void mmdrop(struct mm_struct * mm) -{ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) - __mmdrop(mm); -} - -/* mmput gets rid of the mappings and all user-space */ -extern void mmput(struct mm_struct *); -/* Grab a reference to a task's mm, if it is not already going away */ -extern struct mm_struct *get_task_mm(struct task_struct *task); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(struct task_struct *, struct mm_struct *); -/* Allocate a new mm structure and copy contents from tsk->mm */ -extern struct mm_struct *dup_mm(struct task_struct *tsk); - -extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); -extern void flush_thread(void); -extern void exit_thread(void); - -extern void exit_files(struct task_struct *); -extern void __cleanup_signal(struct signal_struct *); -extern void __cleanup_sighand(struct sighand_struct *); - -extern void exit_itimers(struct signal_struct *); -extern void flush_itimer_signals(void); - -extern NORET_TYPE void do_group_exit(int); - -extern void daemonize(const char *, ...); -extern int allow_signal(int); -extern int disallow_signal(int); - -extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *); -extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); -struct task_struct *fork_idle(int); - -extern void set_task_comm(struct task_struct *tsk, char *from); -extern char *get_task_comm(char *to, struct task_struct *tsk); - -#ifdef CONFIG_SMP -extern unsigned long wait_task_inactive(struct task_struct *, long match_state); -#else -static inline unsigned long wait_task_inactive(struct task_struct *p, - long match_state) -{ - return 1; -} -#endif - -#define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) - -#define for_each_process(p) \ - for (p = &init_task ; (p = next_task(p)) != &init_task ; ) - -/* - * Careful: do_each_thread/while_each_thread is a double loop so - * 'break' will not work as expected - use goto instead. - */ -#define do_each_thread(g, t) \ - for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do - -#define while_each_thread(g, t) \ - while ((t = next_thread(t)) != g) - -/* de_thread depends on thread_group_leader not being a pid based check */ -#define thread_group_leader(p) (p == p->group_leader) - -/* Do to the insanities of de_thread it is possible for a process - * to have the pid of the thread group leader without actually being - * the thread group leader. For iteration through the pids in proc - * all we care about is that we have a task with the appropriate - * pid, we don't actually care if we have the right task. - */ -static inline int has_group_leader_pid(struct task_struct *p) -{ - return p->pid == p->tgid; -} - -static inline -int same_thread_group(struct task_struct *p1, struct task_struct *p2) -{ - return p1->tgid == p2->tgid; -} - -static inline struct task_struct *next_thread(const struct task_struct *p) -{ - return list_entry(rcu_dereference(p->thread_group.next), - struct task_struct, thread_group); -} - -static inline int thread_group_empty(struct task_struct *p) -{ - return list_empty(&p->thread_group); -} - -#define delay_group_leader(p) \ - (thread_group_leader(p) && !thread_group_empty(p)) - -/* - * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring - * subscriptions and synchronises with wait4(). Also used in procfs. Also - * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. - * - * Nests both inside and outside of read_lock(&tasklist_lock). - * It must not be nested with write_lock_irq(&tasklist_lock), - * neither inside nor outside. - */ -static inline void task_lock(struct task_struct *p) -{ - spin_lock(&p->alloc_lock); -} - -static inline void task_unlock(struct task_struct *p) -{ - spin_unlock(&p->alloc_lock); -} - -extern struct sighand_struct *lock_task_sighand(struct task_struct *tsk, - unsigned long *flags); - -static inline void unlock_task_sighand(struct task_struct *tsk, - unsigned long *flags) -{ - spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); -} - -#ifndef __HAVE_THREAD_FUNCTIONS - -#define task_thread_info(task) ((struct thread_info *)(task)->stack) -#define task_stack_page(task) ((task)->stack) - -static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) -{ - *task_thread_info(p) = *task_thread_info(org); - task_thread_info(p)->task = p; -} - -static inline unsigned long *end_of_stack(struct task_struct *p) -{ - return (unsigned long *)(task_thread_info(p) + 1); -} - -#endif - -static inline int object_is_on_stack(void *obj) -{ - void *stack = task_stack_page(current); - - return (obj >= stack) && (obj < (stack + THREAD_SIZE)); -} - -extern void thread_info_cache_init(void); - -/* set thread flags in other task's structures - * - see asm/thread_info.h for TIF_xxxx flags available - */ -static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag) -{ - set_ti_thread_flag(task_thread_info(tsk), flag); -} - -static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag) -{ - clear_ti_thread_flag(task_thread_info(tsk), flag); -} - -static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) -{ - return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); -} - -static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) -{ - return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag); -} - -static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) -{ - return test_ti_thread_flag(task_thread_info(tsk), flag); -} - -static inline void set_tsk_need_resched(struct task_struct *tsk) -{ - set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); -} - -static inline void clear_tsk_need_resched(struct task_struct *tsk) -{ - clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); -} - -static inline int test_tsk_need_resched(struct task_struct *tsk) -{ - return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); -} - -static inline int signal_pending(struct task_struct *p) -{ - return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); -} - -extern int __fatal_signal_pending(struct task_struct *p); - -static inline int fatal_signal_pending(struct task_struct *p) -{ - return signal_pending(p) && __fatal_signal_pending(p); -} - -static inline int signal_pending_state(long state, struct task_struct *p) -{ - if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) - return 0; - if (!signal_pending(p)) - return 0; - - return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); -} - -static inline int need_resched(void) -{ - return unlikely(test_thread_flag(TIF_NEED_RESCHED)); -} - -/* - * cond_resched() and cond_resched_lock(): latency reduction via - * explicit rescheduling in places that are safe. The return - * value indicates whether a reschedule was done in fact. - * cond_resched_lock() will drop the spinlock before scheduling, - * cond_resched_softirq() will enable bhs before scheduling. - */ -extern int _cond_resched(void); -#ifdef CONFIG_PREEMPT_BKL -static inline int cond_resched(void) -{ - return 0; -} -#else -static inline int cond_resched(void) -{ - return _cond_resched(); -} -#endif -extern int cond_resched_lock(spinlock_t * lock); -extern int cond_resched_softirq(void); -static inline int cond_resched_bkl(void) -{ - return _cond_resched(); -} - -/* - * Does a critical section need to be broken due to another - * task waiting?: (technically does not depend on CONFIG_PREEMPT, - * but a general need for low latency) - */ -static inline int spin_needbreak(spinlock_t *lock) -{ -#ifdef CONFIG_PREEMPT - return spin_is_contended(lock); -#else - return 0; -#endif -} - -/* - * Reevaluate whether the task has signals pending delivery. - * Wake the task if so. - * This is required every time the blocked sigset_t changes. - * callers must hold sighand->siglock. - */ -extern void recalc_sigpending_and_wake(struct task_struct *t); -extern void recalc_sigpending(void); - -extern void signal_wake_up(struct task_struct *t, int resume_stopped); - -/* - * Wrappers for p->thread_info->cpu access. No-op on UP. - */ -#ifdef CONFIG_SMP - -static inline unsigned int task_cpu(const struct task_struct *p) -{ - return task_thread_info(p)->cpu; -} - -extern void set_task_cpu(struct task_struct *p, unsigned int cpu); - -#else - -static inline unsigned int task_cpu(const struct task_struct *p) -{ - return 0; -} - -static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) -{ -} - -#endif /* CONFIG_SMP */ - -extern void arch_pick_mmap_layout(struct mm_struct *mm); - -#ifdef CONFIG_TRACING -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); -#else -static inline void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} -#endif - -extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -extern long sched_getaffinity(pid_t pid, cpumask_t *mask); - -extern int sched_mc_power_savings, sched_smt_power_savings; - -extern void normalize_rt_tasks(void); - -#ifdef CONFIG_GROUP_SCHED - -extern struct task_group init_task_group; -#ifdef CONFIG_USER_SCHED -extern struct task_group root_task_group; -#endif - -extern struct task_group *sched_create_group(struct task_group *parent); -extern void sched_destroy_group(struct task_group *tg); -extern void sched_move_task(struct task_struct *tsk); -#ifdef CONFIG_FAIR_GROUP_SCHED -extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); -extern unsigned long sched_group_shares(struct task_group *tg); -#endif -#ifdef CONFIG_RT_GROUP_SCHED -extern int sched_group_set_rt_runtime(struct task_group *tg, - long rt_runtime_us); -extern long sched_group_rt_runtime(struct task_group *tg); -extern int sched_group_set_rt_period(struct task_group *tg, - long rt_period_us); -extern long sched_group_rt_period(struct task_group *tg); -#endif -#endif - -#ifdef CONFIG_TASK_XACCT -static inline void add_rchar(struct task_struct *tsk, ssize_t amt) -{ - tsk->ioac.rchar += amt; -} - -static inline void add_wchar(struct task_struct *tsk, ssize_t amt) -{ - tsk->ioac.wchar += amt; -} - -static inline void inc_syscr(struct task_struct *tsk) -{ - tsk->ioac.syscr++; -} - -static inline void inc_syscw(struct task_struct *tsk) -{ - tsk->ioac.syscw++; -} -#else -static inline void add_rchar(struct task_struct *tsk, ssize_t amt) -{ -} - -static inline void add_wchar(struct task_struct *tsk, ssize_t amt) -{ -} - -static inline void inc_syscr(struct task_struct *tsk) -{ -} - -static inline void inc_syscw(struct task_struct *tsk) -{ -} -#endif - -#ifndef TASK_SIZE_OF -#define TASK_SIZE_OF(tsk) TASK_SIZE -#endif - -#ifdef CONFIG_MM_OWNER -extern void mm_update_next_owner(struct mm_struct *mm); -extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); -#else -static inline void mm_update_next_owner(struct mm_struct *mm) -{ -} - -static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) -{ -} -#endif /* CONFIG_MM_OWNER */ - -#define TASK_STATE_TO_CHAR_STR "RSDTtZX" - -#endif /* __KERNEL__ */ - -#endif diff -Nurb linux-2.6.27-720/include/linux/seccomp.h linux-2.6.27-710/include/linux/seccomp.h --- linux-2.6.27-720/include/linux/seccomp.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/seccomp.h 2008-10-09 18:13:53.000000000 -0400 @@ -21,7 +21,7 @@ #else /* CONFIG_SECCOMP */ -typedef EMPTY_STRUCT_DECL(/* unnamed */) seccomp_t; +typedef struct { } seccomp_t; #define secure_computing(x) do { } while (0) diff -Nurb linux-2.6.27-720/include/linux/security.h linux-2.6.27-710/include/linux/security.h --- linux-2.6.27-720/include/linux/security.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/security.h 2008-10-09 18:13:53.000000000 -0400 @@ -2427,7 +2427,7 @@ static inline struct dentry *securityfs_create_dir(const char *name, struct dentry *parent) { - return (struct dentry *) ERR_PTR(-ENODEV); + return ERR_PTR(-ENODEV); } static inline struct dentry *securityfs_create_file(const char *name, @@ -2436,7 +2436,7 @@ void *data, const struct file_operations *fops) { - return (struct dentry *) ERR_PTR(-ENODEV); + return ERR_PTR(-ENODEV); } static inline void securityfs_remove(struct dentry *dentry) diff -Nurb linux-2.6.27-720/include/linux/semaphore.h linux-2.6.27-710/include/linux/semaphore.h --- linux-2.6.27-720/include/linux/semaphore.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/semaphore.h 2008-10-09 18:13:53.000000000 -0400 @@ -19,21 +19,12 @@ struct list_head wait_list; }; -#ifdef __cplusplus -#define __SEMAPHORE_INITIALIZER(name, n) \ -({ struct semaphore duh; \ - duh.lock = __SPIN_LOCK_UNLOCKED((name).lock), \ - duh.count = n, \ - duh.wait_list = LIST_HEAD_INIT((name).wait_list), \ - duh;}) -#else #define __SEMAPHORE_INITIALIZER(name, n) \ { \ .lock = __SPIN_LOCK_UNLOCKED((name).lock), \ .count = n, \ .wait_list = LIST_HEAD_INIT((name).wait_list), \ } -#endif #define DECLARE_MUTEX(name) \ struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) diff -Nurb linux-2.6.27-720/include/linux/skbuff.h linux-2.6.27-710/include/linux/skbuff.h --- linux-2.6.27-720/include/linux/skbuff.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/skbuff.h 2009-05-04 12:15:31.000000000 -0400 @@ -194,12 +194,6 @@ typedef unsigned char *sk_buff_data_t; #endif -/* Click: overload sk_buff.pkt_type to contain information about whether - a packet is clean. Clean packets have the following fields zero: - dst, destructor, pkt_bridged, prev, list, sk, security, priority. */ -#define PACKET_CLEAN 128 /* Is packet clean? */ -#define PACKET_TYPE_MASK 127 /* Actual packet type */ - /** * struct sk_buff - socket buffer * @next: Next buffer in list @@ -383,7 +377,6 @@ gfp_t priority); extern struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask); -extern struct sk_buff *skb_recycle(struct sk_buff *skb); extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask); @@ -1333,7 +1326,7 @@ } static inline int skb_add_data(struct sk_buff *skb, - unsigned char __user *from, int copy) + char __user *from, int copy) { const int off = skb->len; @@ -1409,7 +1402,7 @@ const void *start, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_sub(skb->csum, csum_partial((const unsigned char *) start, len, 0)); + skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); } unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); diff -Nurb linux-2.6.27-720/include/linux/skbuff.h.orig linux-2.6.27-710/include/linux/skbuff.h.orig --- linux-2.6.27-720/include/linux/skbuff.h.orig 2009-05-04 12:15:31.000000000 -0400 +++ linux-2.6.27-710/include/linux/skbuff.h.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,1730 +0,0 @@ -/* - * Definitions for the 'struct sk_buff' memory handlers. - * - * Authors: - * Alan Cox, - * Florian La Roche, - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _LINUX_SKBUFF_H -#define _LINUX_SKBUFF_H - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define HAVE_ALLOC_SKB /* For the drivers to know */ -#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ - -/* Don't change this without changing skb_csum_unnecessary! */ -#define CHECKSUM_NONE 0 -#define CHECKSUM_UNNECESSARY 1 -#define CHECKSUM_COMPLETE 2 -#define CHECKSUM_PARTIAL 3 - -#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ - ~(SMP_CACHE_BYTES - 1)) -#define SKB_WITH_OVERHEAD(X) \ - ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) -#define SKB_MAX_ORDER(X, ORDER) \ - SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X)) -#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) -#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) - -/* A. Checksumming of received packets by device. - * - * NONE: device failed to checksum this packet. - * skb->csum is undefined. - * - * UNNECESSARY: device parsed packet and wouldbe verified checksum. - * skb->csum is undefined. - * It is bad option, but, unfortunately, many of vendors do this. - * Apparently with secret goal to sell you new device, when you - * will add new protocol to your host. F.e. IPv6. 8) - * - * COMPLETE: the most generic way. Device supplied checksum of _all_ - * the packet as seen by netif_rx in skb->csum. - * NOTE: Even if device supports only some protocols, but - * is able to produce some skb->csum, it MUST use COMPLETE, - * not UNNECESSARY. - * - * PARTIAL: identical to the case for output below. This may occur - * on a packet received directly from another Linux OS, e.g., - * a virtualised Linux kernel on the same host. The packet can - * be treated in the same way as UNNECESSARY except that on - * output (i.e., forwarding) the checksum must be filled in - * by the OS or the hardware. - * - * B. Checksumming on output. - * - * NONE: skb is checksummed by protocol or csum is not required. - * - * PARTIAL: device is required to csum packet as seen by hard_start_xmit - * from skb->csum_start to the end and to record the checksum - * at skb->csum_start + skb->csum_offset. - * - * Device must show its capabilities in dev->features, set - * at device setup time. - * NETIF_F_HW_CSUM - it is clever device, it is able to checksum - * everything. - * NETIF_F_NO_CSUM - loopback or reliable single hop media. - * NETIF_F_IP_CSUM - device is dumb. It is able to csum only - * TCP/UDP over IPv4. Sigh. Vendors like this - * way by an unknown reason. Though, see comment above - * about CHECKSUM_UNNECESSARY. 8) - * NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead. - * - * Any questions? No questions, good. --ANK - */ - -struct net_device; -struct scatterlist; -struct pipe_inode_info; - -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) -struct nf_conntrack { - atomic_t use; -}; -#endif - -#ifdef CONFIG_BRIDGE_NETFILTER -struct nf_bridge_info { - atomic_t use; - struct net_device *physindev; - struct net_device *physoutdev; - unsigned int mask; - unsigned long data[32 / sizeof(unsigned long)]; -}; -#endif - -struct sk_buff_head { - /* These two members must be first. */ - struct sk_buff *next; - struct sk_buff *prev; - - __u32 qlen; - spinlock_t lock; -}; - -struct sk_buff; - -/* To allow 64K frame to be packed as single skb without frag_list */ -#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2) - -typedef struct skb_frag_struct skb_frag_t; - -struct skb_frag_struct { - struct page *page; - __u32 page_offset; - __u32 size; -}; - -/* This data is invariant across clones and lives at - * the end of the header data, ie. at skb->end. - */ -struct skb_shared_info { - atomic_t dataref; - unsigned short nr_frags; - unsigned short gso_size; - /* Warning: this field is not always filled in (UFO)! */ - unsigned short gso_segs; - unsigned short gso_type; - __be32 ip6_frag_id; - struct sk_buff *frag_list; - skb_frag_t frags[MAX_SKB_FRAGS]; -}; - -/* We divide dataref into two halves. The higher 16 bits hold references - * to the payload part of skb->data. The lower 16 bits hold references to - * the entire skb->data. A clone of a headerless skb holds the length of - * the header in skb->hdr_len. - * - * All users must obey the rule that the skb->data reference count must be - * greater than or equal to the payload reference count. - * - * Holding a reference to the payload part means that the user does not - * care about modifications to the header part of skb->data. - */ -#define SKB_DATAREF_SHIFT 16 -#define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) - - -enum { - SKB_FCLONE_UNAVAILABLE, - SKB_FCLONE_ORIG, - SKB_FCLONE_CLONE, -}; - -enum { - SKB_GSO_TCPV4 = 1 << 0, - SKB_GSO_UDP = 1 << 1, - - /* This indicates the skb is from an untrusted source. */ - SKB_GSO_DODGY = 1 << 2, - - /* This indicates the tcp segment has CWR set. */ - SKB_GSO_TCP_ECN = 1 << 3, - - SKB_GSO_TCPV6 = 1 << 4, -}; - -#if BITS_PER_LONG > 32 -#define NET_SKBUFF_DATA_USES_OFFSET 1 -#endif - -#ifdef NET_SKBUFF_DATA_USES_OFFSET -typedef unsigned int sk_buff_data_t; -#else -typedef unsigned char *sk_buff_data_t; -#endif - -/** - * struct sk_buff - socket buffer - * @next: Next buffer in list - * @prev: Previous buffer in list - * @sk: Socket we are owned by - * @tstamp: Time we arrived - * @dev: Device we arrived on/are leaving by - * @transport_header: Transport layer header - * @network_header: Network layer header - * @mac_header: Link layer header - * @dst: destination entry - * @sp: the security path, used for xfrm - * @cb: Control buffer. Free for use by every layer. Put private vars here - * @len: Length of actual data - * @data_len: Data length - * @mac_len: Length of link layer header - * @hdr_len: writable header length of cloned skb - * @csum: Checksum (must include start/offset pair) - * @csum_start: Offset from skb->head where checksumming should start - * @csum_offset: Offset from csum_start where checksum should be stored - * @local_df: allow local fragmentation - * @cloned: Head may be cloned (check refcnt to be sure) - * @nohdr: Payload reference only, must not modify header - * @pkt_type: Packet class - * @fclone: skbuff clone status - * @ip_summed: Driver fed us an IP checksum - * @priority: Packet queueing priority - * @users: User count - see {datagram,tcp}.c - * @protocol: Packet protocol from driver - * @truesize: Buffer size - * @head: Head of buffer - * @data: Data head pointer - * @tail: Tail pointer - * @end: End pointer - * @destructor: Destruct function - * @mark: Generic packet mark - * @nfct: Associated connection, if any - * @ipvs_property: skbuff is owned by ipvs - * @peeked: this packet has been seen already, so stats have been - * done for it, don't do them again - * @nf_trace: netfilter packet trace flag - * @nfctinfo: Relationship of this skb to the connection - * @nfct_reasm: netfilter conntrack re-assembly pointer - * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c - * @iif: ifindex of device we arrived on - * @queue_mapping: Queue mapping for multiqueue devices - * @tc_index: Traffic control index - * @tc_verd: traffic control verdict - * @ndisc_nodetype: router type (from link layer) - * @do_not_encrypt: set to prevent encryption of this frame - * @dma_cookie: a cookie to one of several possible DMA operations - * done by skb DMA functions - * @secmark: security marking - * @vlan_tci: vlan tag control information - */ - -struct sk_buff { - /* These two members must be first. */ - struct sk_buff *next; - struct sk_buff *prev; - - struct sock *sk; - ktime_t tstamp; - struct net_device *dev; - - union { - struct dst_entry *dst; - struct rtable *rtable; - }; - struct sec_path *sp; - - /* - * This is the control buffer. It is free to use for every - * layer. Please put your private variables there. If you - * want to keep them across layers you have to do a skb_clone() - * first. This is owned by whoever has the skb queued ATM. - */ - char cb[48]; - - unsigned int len, - data_len; - __u16 mac_len, - hdr_len; - union { - __wsum csum; - struct { - __u16 csum_start; - __u16 csum_offset; - }; - }; - __u32 priority; - __u8 local_df:1, - cloned:1, - ip_summed:2, - nohdr:1, - nfctinfo:3; - __u8 pkt_type:3, - fclone:2, - ipvs_property:1, - peeked:1, - nf_trace:1; - __be16 protocol; - - void (*destructor)(struct sk_buff *skb); -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - struct nf_conntrack *nfct; - struct sk_buff *nfct_reasm; -#endif -#ifdef CONFIG_BRIDGE_NETFILTER - struct nf_bridge_info *nf_bridge; -#endif - - int iif; - __u16 queue_mapping; -#ifdef CONFIG_NET_SCHED - __u16 tc_index; /* traffic control index */ -#ifdef CONFIG_NET_CLS_ACT - __u16 tc_verd; /* traffic control verdict */ -#endif -#endif -#ifdef CONFIG_IPV6_NDISC_NODETYPE - __u8 ndisc_nodetype:2; -#endif -#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) - __u8 do_not_encrypt:1; -#endif - /* 0/13/14 bit hole */ - -#ifdef CONFIG_NET_DMA - dma_cookie_t dma_cookie; -#endif -#ifdef CONFIG_NETWORK_SECMARK - __u32 secmark; -#endif - - __u32 mark; -#define skb_tag mark - - __u16 vlan_tci; - - sk_buff_data_t transport_header; - sk_buff_data_t network_header; - sk_buff_data_t mac_header; - /* These elements must be at the end, see alloc_skb() for details. */ - sk_buff_data_t tail; - sk_buff_data_t end; - unsigned char *head, - *data; - unsigned int truesize; - atomic_t users; -}; - -#ifdef __KERNEL__ -/* - * Handling routines are only of interest to the kernel - */ -#include - -#include - -extern void kfree_skb(struct sk_buff *skb); -extern void __kfree_skb(struct sk_buff *skb); -extern struct sk_buff *__alloc_skb(unsigned int size, - gfp_t priority, int fclone, int node); -static inline struct sk_buff *alloc_skb(unsigned int size, - gfp_t priority) -{ - return __alloc_skb(size, priority, 0, -1); -} - -static inline struct sk_buff *alloc_skb_fclone(unsigned int size, - gfp_t priority) -{ - return __alloc_skb(size, priority, 1, -1); -} - -extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); -extern struct sk_buff *skb_clone(struct sk_buff *skb, - gfp_t priority); -extern struct sk_buff *skb_copy(const struct sk_buff *skb, - gfp_t priority); -extern struct sk_buff *pskb_copy(struct sk_buff *skb, - gfp_t gfp_mask); -extern int pskb_expand_head(struct sk_buff *skb, - int nhead, int ntail, - gfp_t gfp_mask); -extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, - unsigned int headroom); -extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, int newtailroom, - gfp_t priority); -extern int skb_to_sgvec(struct sk_buff *skb, - struct scatterlist *sg, int offset, - int len); -extern int skb_cow_data(struct sk_buff *skb, int tailbits, - struct sk_buff **trailer); -extern int skb_pad(struct sk_buff *skb, int pad); -#define dev_kfree_skb(a) kfree_skb(a) -extern void skb_over_panic(struct sk_buff *skb, int len, - void *here); -extern void skb_under_panic(struct sk_buff *skb, int len, - void *here); -extern void skb_truesize_bug(struct sk_buff *skb); - -static inline void skb_truesize_check(struct sk_buff *skb) -{ - int len = sizeof(struct sk_buff) + skb->len; - - if (unlikely((int)skb->truesize < len)) - skb_truesize_bug(skb); -} - -extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, - int getfrag(void *from, char *to, int offset, - int len,int odd, struct sk_buff *skb), - void *from, int length); - -struct skb_seq_state -{ - __u32 lower_offset; - __u32 upper_offset; - __u32 frag_idx; - __u32 stepped_offset; - struct sk_buff *root_skb; - struct sk_buff *cur_skb; - __u8 *frag_data; -}; - -extern void skb_prepare_seq_read(struct sk_buff *skb, - unsigned int from, unsigned int to, - struct skb_seq_state *st); -extern unsigned int skb_seq_read(unsigned int consumed, const u8 **data, - struct skb_seq_state *st); -extern void skb_abort_seq_read(struct skb_seq_state *st); - -extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, - unsigned int to, struct ts_config *config, - struct ts_state *state); - -#ifdef NET_SKBUFF_DATA_USES_OFFSET -static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) -{ - return skb->head + skb->end; -} -#else -static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) -{ - return skb->end; -} -#endif - -/* Internal */ -#define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) - -/** - * skb_queue_empty - check if a queue is empty - * @list: queue head - * - * Returns true if the queue is empty, false otherwise. - */ -static inline int skb_queue_empty(const struct sk_buff_head *list) -{ - return list->next == (struct sk_buff *)list; -} - -/** - * skb_get - reference buffer - * @skb: buffer to reference - * - * Makes another reference to a socket buffer and returns a pointer - * to the buffer. - */ -static inline struct sk_buff *skb_get(struct sk_buff *skb) -{ - atomic_inc(&skb->users); - return skb; -} - -/* - * If users == 1, we are the only owner and are can avoid redundant - * atomic change. - */ - -/** - * skb_cloned - is the buffer a clone - * @skb: buffer to check - * - * Returns true if the buffer was generated with skb_clone() and is - * one of multiple shared copies of the buffer. Cloned buffers are - * shared data so must not be written to under normal circumstances. - */ -static inline int skb_cloned(const struct sk_buff *skb) -{ - return skb->cloned && - (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1; -} - -/** - * skb_header_cloned - is the header a clone - * @skb: buffer to check - * - * Returns true if modifying the header part of the buffer requires - * the data to be copied. - */ -static inline int skb_header_cloned(const struct sk_buff *skb) -{ - int dataref; - - if (!skb->cloned) - return 0; - - dataref = atomic_read(&skb_shinfo(skb)->dataref); - dataref = (dataref & SKB_DATAREF_MASK) - (dataref >> SKB_DATAREF_SHIFT); - return dataref != 1; -} - -/** - * skb_header_release - release reference to header - * @skb: buffer to operate on - * - * Drop a reference to the header part of the buffer. This is done - * by acquiring a payload reference. You must not read from the header - * part of skb->data after this. - */ -static inline void skb_header_release(struct sk_buff *skb) -{ - BUG_ON(skb->nohdr); - skb->nohdr = 1; - atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref); -} - -/** - * skb_shared - is the buffer shared - * @skb: buffer to check - * - * Returns true if more than one person has a reference to this - * buffer. - */ -static inline int skb_shared(const struct sk_buff *skb) -{ - return atomic_read(&skb->users) != 1; -} - -/** - * skb_share_check - check if buffer is shared and if so clone it - * @skb: buffer to check - * @pri: priority for memory allocation - * - * If the buffer is shared the buffer is cloned and the old copy - * drops a reference. A new clone with a single reference is returned. - * If the buffer is not shared the original buffer is returned. When - * being called from interrupt status or with spinlocks held pri must - * be GFP_ATOMIC. - * - * NULL is returned on a memory allocation failure. - */ -static inline struct sk_buff *skb_share_check(struct sk_buff *skb, - gfp_t pri) -{ - might_sleep_if(pri & __GFP_WAIT); - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, pri); - kfree_skb(skb); - skb = nskb; - } - return skb; -} - -/* - * Copy shared buffers into a new sk_buff. We effectively do COW on - * packets to handle cases where we have a local reader and forward - * and a couple of other messy ones. The normal one is tcpdumping - * a packet thats being forwarded. - */ - -/** - * skb_unshare - make a copy of a shared buffer - * @skb: buffer to check - * @pri: priority for memory allocation - * - * If the socket buffer is a clone then this function creates a new - * copy of the data, drops a reference count on the old copy and returns - * the new copy with the reference count at 1. If the buffer is not a clone - * the original buffer is returned. When called with a spinlock held or - * from interrupt state @pri must be %GFP_ATOMIC - * - * %NULL is returned on a memory allocation failure. - */ -static inline struct sk_buff *skb_unshare(struct sk_buff *skb, - gfp_t pri) -{ - might_sleep_if(pri & __GFP_WAIT); - if (skb_cloned(skb)) { - struct sk_buff *nskb = skb_copy(skb, pri); - kfree_skb(skb); /* Free our shared copy */ - skb = nskb; - } - return skb; -} - -/** - * skb_peek - * @list_: list to peek at - * - * Peek an &sk_buff. Unlike most other operations you _MUST_ - * be careful with this one. A peek leaves the buffer on the - * list and someone else may run off with it. You must hold - * the appropriate locks or have a private queue to do this. - * - * Returns %NULL for an empty list or a pointer to the head element. - * The reference count is not incremented and the reference is therefore - * volatile. Use with caution. - */ -static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) -{ - struct sk_buff *list = ((struct sk_buff *)list_)->next; - if (list == (struct sk_buff *)list_) - list = NULL; - return list; -} - -/** - * skb_peek_tail - * @list_: list to peek at - * - * Peek an &sk_buff. Unlike most other operations you _MUST_ - * be careful with this one. A peek leaves the buffer on the - * list and someone else may run off with it. You must hold - * the appropriate locks or have a private queue to do this. - * - * Returns %NULL for an empty list or a pointer to the tail element. - * The reference count is not incremented and the reference is therefore - * volatile. Use with caution. - */ -static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) -{ - struct sk_buff *list = ((struct sk_buff *)list_)->prev; - if (list == (struct sk_buff *)list_) - list = NULL; - return list; -} - -/** - * skb_queue_len - get queue length - * @list_: list to measure - * - * Return the length of an &sk_buff queue. - */ -static inline __u32 skb_queue_len(const struct sk_buff_head *list_) -{ - return list_->qlen; -} - -/* - * This function creates a split out lock class for each invocation; - * this is needed for now since a whole lot of users of the skb-queue - * infrastructure in drivers have different locking usage (in hardirq) - * than the networking core (in softirq only). In the long run either the - * network layer or drivers should need annotation to consolidate the - * main types of usage into 3 classes. - */ -static inline void skb_queue_head_init(struct sk_buff_head *list) -{ - spin_lock_init(&list->lock); - list->prev = list->next = (struct sk_buff *)list; - list->qlen = 0; -} - -static inline void skb_queue_head_init_class(struct sk_buff_head *list, - struct lock_class_key *class) -{ - skb_queue_head_init(list); - lockdep_set_class(&list->lock, class); -} - -/* - * Insert an sk_buff on a list. - * - * The "__skb_xxxx()" functions are the non-atomic ones that - * can only be called with interrupts disabled. - */ -extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); -static inline void __skb_insert(struct sk_buff *newsk, - struct sk_buff *prev, struct sk_buff *next, - struct sk_buff_head *list) -{ - newsk->next = next; - newsk->prev = prev; - next->prev = prev->next = newsk; - list->qlen++; -} - -/** - * __skb_queue_after - queue a buffer at the list head - * @list: list to use - * @prev: place after this buffer - * @newsk: buffer to queue - * - * Queue a buffer int the middle of a list. This function takes no locks - * and you must therefore hold required locks before calling it. - * - * A buffer cannot be placed on two lists at the same time. - */ -static inline void __skb_queue_after(struct sk_buff_head *list, - struct sk_buff *prev, - struct sk_buff *newsk) -{ - __skb_insert(newsk, prev, prev->next, list); -} - -extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, - struct sk_buff_head *list); - -static inline void __skb_queue_before(struct sk_buff_head *list, - struct sk_buff *next, - struct sk_buff *newsk) -{ - __skb_insert(newsk, next->prev, next, list); -} - -/** - * __skb_queue_head - queue a buffer at the list head - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the start of a list. This function takes no locks - * and you must therefore hold required locks before calling it. - * - * A buffer cannot be placed on two lists at the same time. - */ -extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); -static inline void __skb_queue_head(struct sk_buff_head *list, - struct sk_buff *newsk) -{ - __skb_queue_after(list, (struct sk_buff *)list, newsk); -} - -/** - * __skb_queue_tail - queue a buffer at the list tail - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the end of a list. This function takes no locks - * and you must therefore hold required locks before calling it. - * - * A buffer cannot be placed on two lists at the same time. - */ -extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); -static inline void __skb_queue_tail(struct sk_buff_head *list, - struct sk_buff *newsk) -{ - __skb_queue_before(list, (struct sk_buff *)list, newsk); -} - -/* - * remove sk_buff from list. _Must_ be called atomically, and with - * the list known.. - */ -extern void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); -static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) -{ - struct sk_buff *next, *prev; - - list->qlen--; - next = skb->next; - prev = skb->prev; - skb->next = skb->prev = NULL; - next->prev = prev; - prev->next = next; -} - -/** - * __skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. This function does not take any locks - * so must be used with appropriate locks held only. The head item is - * returned or %NULL if the list is empty. - */ -extern struct sk_buff *skb_dequeue(struct sk_buff_head *list); -static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) -{ - struct sk_buff *skb = skb_peek(list); - if (skb) - __skb_unlink(skb, list); - return skb; -} - -/** - * __skb_dequeue_tail - remove from the tail of the queue - * @list: list to dequeue from - * - * Remove the tail of the list. This function does not take any locks - * so must be used with appropriate locks held only. The tail item is - * returned or %NULL if the list is empty. - */ -extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); -static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) -{ - struct sk_buff *skb = skb_peek_tail(list); - if (skb) - __skb_unlink(skb, list); - return skb; -} - - -static inline int skb_is_nonlinear(const struct sk_buff *skb) -{ - return skb->data_len; -} - -static inline unsigned int skb_headlen(const struct sk_buff *skb) -{ - return skb->len - skb->data_len; -} - -static inline int skb_pagelen(const struct sk_buff *skb) -{ - int i, len = 0; - - for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) - len += skb_shinfo(skb)->frags[i].size; - return len + skb_headlen(skb); -} - -static inline void skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) -{ - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - frag->page = page; - frag->page_offset = off; - frag->size = size; - skb_shinfo(skb)->nr_frags = i + 1; -} - -#define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) -#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list) -#define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) - -#ifdef NET_SKBUFF_DATA_USES_OFFSET -static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) -{ - return skb->head + skb->tail; -} - -static inline void skb_reset_tail_pointer(struct sk_buff *skb) -{ - skb->tail = skb->data - skb->head; -} - -static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) -{ - skb_reset_tail_pointer(skb); - skb->tail += offset; -} -#else /* NET_SKBUFF_DATA_USES_OFFSET */ -static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) -{ - return skb->tail; -} - -static inline void skb_reset_tail_pointer(struct sk_buff *skb) -{ - skb->tail = skb->data; -} - -static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) -{ - skb->tail = skb->data + offset; -} - -#endif /* NET_SKBUFF_DATA_USES_OFFSET */ - -/* - * Add data to an sk_buff - */ -extern unsigned char *skb_put(struct sk_buff *skb, unsigned int len); -static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) -{ - unsigned char *tmp = skb_tail_pointer(skb); - SKB_LINEAR_ASSERT(skb); - skb->tail += len; - skb->len += len; - return tmp; -} - -extern unsigned char *skb_push(struct sk_buff *skb, unsigned int len); -static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) -{ - skb->data -= len; - skb->len += len; - return skb->data; -} - -extern unsigned char *skb_pull(struct sk_buff *skb, unsigned int len); -static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) -{ - skb->len -= len; - BUG_ON(skb->len < skb->data_len); - return skb->data += len; -} - -extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); - -static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len) -{ - if (len > skb_headlen(skb) && - !__pskb_pull_tail(skb, len - skb_headlen(skb))) - return NULL; - skb->len -= len; - return skb->data += len; -} - -static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len) -{ - return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len); -} - -static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) -{ - if (likely(len <= skb_headlen(skb))) - return 1; - if (unlikely(len > skb->len)) - return 0; - return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL; -} - -/** - * skb_headroom - bytes at buffer head - * @skb: buffer to check - * - * Return the number of bytes of free space at the head of an &sk_buff. - */ -static inline unsigned int skb_headroom(const struct sk_buff *skb) -{ - return skb->data - skb->head; -} - -/** - * skb_tailroom - bytes at buffer end - * @skb: buffer to check - * - * Return the number of bytes of free space at the tail of an sk_buff - */ -static inline int skb_tailroom(const struct sk_buff *skb) -{ - return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail; -} - -/** - * skb_reserve - adjust headroom - * @skb: buffer to alter - * @len: bytes to move - * - * Increase the headroom of an empty &sk_buff by reducing the tail - * room. This is only allowed for an empty buffer. - */ -static inline void skb_reserve(struct sk_buff *skb, int len) -{ - skb->data += len; - skb->tail += len; -} - -#ifdef NET_SKBUFF_DATA_USES_OFFSET -static inline unsigned char *skb_transport_header(const struct sk_buff *skb) -{ - return skb->head + skb->transport_header; -} - -static inline void skb_reset_transport_header(struct sk_buff *skb) -{ - skb->transport_header = skb->data - skb->head; -} - -static inline void skb_set_transport_header(struct sk_buff *skb, - const int offset) -{ - skb_reset_transport_header(skb); - skb->transport_header += offset; -} - -static inline unsigned char *skb_network_header(const struct sk_buff *skb) -{ - return skb->head + skb->network_header; -} - -static inline void skb_reset_network_header(struct sk_buff *skb) -{ - skb->network_header = skb->data - skb->head; -} - -static inline void skb_set_network_header(struct sk_buff *skb, const int offset) -{ - skb_reset_network_header(skb); - skb->network_header += offset; -} - -static inline unsigned char *skb_mac_header(const struct sk_buff *skb) -{ - return skb->head + skb->mac_header; -} - -static inline int skb_mac_header_was_set(const struct sk_buff *skb) -{ - return skb->mac_header != ~0U; -} - -static inline void skb_reset_mac_header(struct sk_buff *skb) -{ - skb->mac_header = skb->data - skb->head; -} - -static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) -{ - skb_reset_mac_header(skb); - skb->mac_header += offset; -} - -#else /* NET_SKBUFF_DATA_USES_OFFSET */ - -static inline unsigned char *skb_transport_header(const struct sk_buff *skb) -{ - return skb->transport_header; -} - -static inline void skb_reset_transport_header(struct sk_buff *skb) -{ - skb->transport_header = skb->data; -} - -static inline void skb_set_transport_header(struct sk_buff *skb, - const int offset) -{ - skb->transport_header = skb->data + offset; -} - -static inline unsigned char *skb_network_header(const struct sk_buff *skb) -{ - return skb->network_header; -} - -static inline void skb_reset_network_header(struct sk_buff *skb) -{ - skb->network_header = skb->data; -} - -static inline void skb_set_network_header(struct sk_buff *skb, const int offset) -{ - skb->network_header = skb->data + offset; -} - -static inline unsigned char *skb_mac_header(const struct sk_buff *skb) -{ - return skb->mac_header; -} - -static inline int skb_mac_header_was_set(const struct sk_buff *skb) -{ - return skb->mac_header != NULL; -} - -static inline void skb_reset_mac_header(struct sk_buff *skb) -{ - skb->mac_header = skb->data; -} - -static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) -{ - skb->mac_header = skb->data + offset; -} -#endif /* NET_SKBUFF_DATA_USES_OFFSET */ - -static inline int skb_transport_offset(const struct sk_buff *skb) -{ - return skb_transport_header(skb) - skb->data; -} - -static inline u32 skb_network_header_len(const struct sk_buff *skb) -{ - return skb->transport_header - skb->network_header; -} - -static inline int skb_network_offset(const struct sk_buff *skb) -{ - return skb_network_header(skb) - skb->data; -} - -/* - * CPUs often take a performance hit when accessing unaligned memory - * locations. The actual performance hit varies, it can be small if the - * hardware handles it or large if we have to take an exception and fix it - * in software. - * - * Since an ethernet header is 14 bytes network drivers often end up with - * the IP header at an unaligned offset. The IP header can be aligned by - * shifting the start of the packet by 2 bytes. Drivers should do this - * with: - * - * skb_reserve(NET_IP_ALIGN); - * - * The downside to this alignment of the IP header is that the DMA is now - * unaligned. On some architectures the cost of an unaligned DMA is high - * and this cost outweighs the gains made by aligning the IP header. - * - * Since this trade off varies between architectures, we allow NET_IP_ALIGN - * to be overridden. - */ -#ifndef NET_IP_ALIGN -#define NET_IP_ALIGN 2 -#endif - -/* - * The networking layer reserves some headroom in skb data (via - * dev_alloc_skb). This is used to avoid having to reallocate skb data when - * the header has to grow. In the default case, if the header has to grow - * 16 bytes or less we avoid the reallocation. - * - * Unfortunately this headroom changes the DMA alignment of the resulting - * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive - * on some architectures. An architecture can override this value, - * perhaps setting it to a cacheline in size (since that will maintain - * cacheline alignment of the DMA). It must be a power of 2. - * - * Various parts of the networking layer expect at least 16 bytes of - * headroom, you should not reduce this. - */ -#ifndef NET_SKB_PAD -#define NET_SKB_PAD 16 -#endif - -extern int ___pskb_trim(struct sk_buff *skb, unsigned int len); - -static inline void __skb_trim(struct sk_buff *skb, unsigned int len) -{ - if (unlikely(skb->data_len)) { - WARN_ON(1); - return; - } - skb->len = len; - skb_set_tail_pointer(skb, len); -} - -extern void skb_trim(struct sk_buff *skb, unsigned int len); - -static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) -{ - if (skb->data_len) - return ___pskb_trim(skb, len); - __skb_trim(skb, len); - return 0; -} - -static inline int pskb_trim(struct sk_buff *skb, unsigned int len) -{ - return (len < skb->len) ? __pskb_trim(skb, len) : 0; -} - -/** - * pskb_trim_unique - remove end from a paged unique (not cloned) buffer - * @skb: buffer to alter - * @len: new length - * - * This is identical to pskb_trim except that the caller knows that - * the skb is not cloned so we should never get an error due to out- - * of-memory. - */ -static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len) -{ - int err = pskb_trim(skb, len); - BUG_ON(err); -} - -/** - * skb_orphan - orphan a buffer - * @skb: buffer to orphan - * - * If a buffer currently has an owner then we call the owner's - * destructor function and make the @skb unowned. The buffer continues - * to exist but is no longer charged to its former owner. - */ -static inline void skb_orphan(struct sk_buff *skb) -{ - if (skb->destructor) - skb->destructor(skb); - skb->destructor = NULL; - skb->sk = NULL; -} - -/** - * __skb_queue_purge - empty a list - * @list: list to empty - * - * Delete all buffers on an &sk_buff list. Each buffer is removed from - * the list and one reference dropped. This function does not take the - * list lock and the caller must hold the relevant locks to use it. - */ -extern void skb_queue_purge(struct sk_buff_head *list); -static inline void __skb_queue_purge(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb = __skb_dequeue(list)) != NULL) - kfree_skb(skb); -} - -/** - * __dev_alloc_skb - allocate an skbuff for receiving - * @length: length to allocate - * @gfp_mask: get_free_pages mask, passed to alloc_skb - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned if there is no free memory. - */ -static inline struct sk_buff *__dev_alloc_skb(unsigned int length, - gfp_t gfp_mask) -{ - struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); - if (likely(skb)) - skb_reserve(skb, NET_SKB_PAD); - return skb; -} - -extern struct sk_buff *dev_alloc_skb(unsigned int length); - -extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev, - unsigned int length, gfp_t gfp_mask); - -/** - * netdev_alloc_skb - allocate an skbuff for rx on a specific device - * @dev: network device to receive on - * @length: length to allocate - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned if there is no free memory. Although this function - * allocates memory it can be called from an interrupt. - */ -static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, - unsigned int length) -{ - return __netdev_alloc_skb(dev, length, GFP_ATOMIC); -} - -/** - * skb_clone_writable - is the header of a clone writable - * @skb: buffer to check - * @len: length up to which to write - * - * Returns true if modifying the header part of the cloned buffer - * does not requires the data to be copied. - */ -static inline int skb_clone_writable(struct sk_buff *skb, unsigned int len) -{ - return !skb_header_cloned(skb) && - skb_headroom(skb) + len <= skb->hdr_len; -} - -static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom, - int cloned) -{ - int delta = 0; - - if (headroom < NET_SKB_PAD) - headroom = NET_SKB_PAD; - if (headroom > skb_headroom(skb)) - delta = headroom - skb_headroom(skb); - - if (delta || cloned) - return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0, - GFP_ATOMIC); - return 0; -} - -/** - * skb_cow - copy header of skb when it is required - * @skb: buffer to cow - * @headroom: needed headroom - * - * If the skb passed lacks sufficient headroom or its data part - * is shared, data is reallocated. If reallocation fails, an error - * is returned and original skb is not changed. - * - * The result is skb with writable area skb->head...skb->tail - * and at least @headroom of space at head. - */ -static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) -{ - return __skb_cow(skb, headroom, skb_cloned(skb)); -} - -/** - * skb_cow_head - skb_cow but only making the head writable - * @skb: buffer to cow - * @headroom: needed headroom - * - * This function is identical to skb_cow except that we replace the - * skb_cloned check by skb_header_cloned. It should be used when - * you only need to push on some header and do not need to modify - * the data. - */ -static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom) -{ - return __skb_cow(skb, headroom, skb_header_cloned(skb)); -} - -/** - * skb_padto - pad an skbuff up to a minimal size - * @skb: buffer to pad - * @len: minimal length - * - * Pads up a buffer to ensure the trailing bytes exist and are - * blanked. If the buffer already contains sufficient data it - * is untouched. Otherwise it is extended. Returns zero on - * success. The skb is freed on error. - */ - -static inline int skb_padto(struct sk_buff *skb, unsigned int len) -{ - unsigned int size = skb->len; - if (likely(size >= len)) - return 0; - return skb_pad(skb, len - size); -} - -static inline int skb_add_data(struct sk_buff *skb, - char __user *from, int copy) -{ - const int off = skb->len; - - if (skb->ip_summed == CHECKSUM_NONE) { - int err = 0; - __wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy), - copy, 0, &err); - if (!err) { - skb->csum = csum_block_add(skb->csum, csum, off); - return 0; - } - } else if (!copy_from_user(skb_put(skb, copy), from, copy)) - return 0; - - __skb_trim(skb, off); - return -EFAULT; -} - -static inline int skb_can_coalesce(struct sk_buff *skb, int i, - struct page *page, int off) -{ - if (i) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; - - return page == frag->page && - off == frag->page_offset + frag->size; - } - return 0; -} - -static inline int __skb_linearize(struct sk_buff *skb) -{ - return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; -} - -/** - * skb_linearize - convert paged skb to linear one - * @skb: buffer to linarize - * - * If there is no free memory -ENOMEM is returned, otherwise zero - * is returned and the old skb data released. - */ -static inline int skb_linearize(struct sk_buff *skb) -{ - return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; -} - -/** - * skb_linearize_cow - make sure skb is linear and writable - * @skb: buffer to process - * - * If there is no free memory -ENOMEM is returned, otherwise zero - * is returned and the old skb data released. - */ -static inline int skb_linearize_cow(struct sk_buff *skb) -{ - return skb_is_nonlinear(skb) || skb_cloned(skb) ? - __skb_linearize(skb) : 0; -} - -/** - * skb_postpull_rcsum - update checksum for received skb after pull - * @skb: buffer to update - * @start: start of data before pull - * @len: length of data pulled - * - * After doing a pull on a received packet, you need to call this to - * update the CHECKSUM_COMPLETE checksum, or set ip_summed to - * CHECKSUM_NONE so that it can be recomputed from scratch. - */ - -static inline void skb_postpull_rcsum(struct sk_buff *skb, - const void *start, unsigned int len) -{ - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); -} - -unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); - -/** - * pskb_trim_rcsum - trim received skb and update checksum - * @skb: buffer to trim - * @len: new length - * - * This is exactly the same as pskb_trim except that it ensures the - * checksum of received packets are still valid after the operation. - */ - -static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) -{ - if (likely(len >= skb->len)) - return 0; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; - return __pskb_trim(skb, len); -} - -#define skb_queue_walk(queue, skb) \ - for (skb = (queue)->next; \ - prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ - skb = skb->next) - -#define skb_queue_walk_safe(queue, skb, tmp) \ - for (skb = (queue)->next, tmp = skb->next; \ - skb != (struct sk_buff *)(queue); \ - skb = tmp, tmp = skb->next) - -#define skb_queue_reverse_walk(queue, skb) \ - for (skb = (queue)->prev; \ - prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ - skb = skb->prev) - - -extern struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, - int *peeked, int *err); -extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, - int noblock, int *err); -extern unsigned int datagram_poll(struct file *file, struct socket *sock, - struct poll_table_struct *wait); -extern int skb_copy_datagram_iovec(const struct sk_buff *from, - int offset, struct iovec *to, - int size); -extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, - int hlen, - struct iovec *iov); -extern int skb_copy_datagram_from_iovec(struct sk_buff *skb, - int offset, - struct iovec *from, - int len); -extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); -extern int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, - unsigned int flags); -extern __wsum skb_checksum(const struct sk_buff *skb, int offset, - int len, __wsum csum); -extern int skb_copy_bits(const struct sk_buff *skb, int offset, - void *to, int len); -extern int skb_store_bits(struct sk_buff *skb, int offset, - const void *from, int len); -extern __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, - int offset, u8 *to, int len, - __wsum csum); -extern int skb_splice_bits(struct sk_buff *skb, - unsigned int offset, - struct pipe_inode_info *pipe, - unsigned int len, - unsigned int flags); -extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); -extern void skb_split(struct sk_buff *skb, - struct sk_buff *skb1, const u32 len); - -extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); - -static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *buffer) -{ - int hlen = skb_headlen(skb); - - if (hlen - offset >= len) - return skb->data + offset; - - if (skb_copy_bits(skb, offset, buffer, len) < 0) - return NULL; - - return buffer; -} - -static inline void skb_copy_from_linear_data(const struct sk_buff *skb, - void *to, - const unsigned int len) -{ - memcpy(to, skb->data, len); -} - -static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb, - const int offset, void *to, - const unsigned int len) -{ - memcpy(to, skb->data + offset, len); -} - -static inline void skb_copy_to_linear_data(struct sk_buff *skb, - const void *from, - const unsigned int len) -{ - memcpy(skb->data, from, len); -} - -static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb, - const int offset, - const void *from, - const unsigned int len) -{ - memcpy(skb->data + offset, from, len); -} - -extern void skb_init(void); - -/** - * skb_get_timestamp - get timestamp from a skb - * @skb: skb to get stamp from - * @stamp: pointer to struct timeval to store stamp in - * - * Timestamps are stored in the skb as offsets to a base timestamp. - * This function converts the offset back to a struct timeval and stores - * it in stamp. - */ -static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp) -{ - *stamp = ktime_to_timeval(skb->tstamp); -} - -static inline void __net_timestamp(struct sk_buff *skb) -{ - skb->tstamp = ktime_get_real(); -} - -static inline ktime_t net_timedelta(ktime_t t) -{ - return ktime_sub(ktime_get_real(), t); -} - -static inline ktime_t net_invalid_timestamp(void) -{ - return ktime_set(0, 0); -} - -extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); -extern __sum16 __skb_checksum_complete(struct sk_buff *skb); - -static inline int skb_csum_unnecessary(const struct sk_buff *skb) -{ - return skb->ip_summed & CHECKSUM_UNNECESSARY; -} - -/** - * skb_checksum_complete - Calculate checksum of an entire packet - * @skb: packet to process - * - * This function calculates the checksum over the entire packet plus - * the value of skb->csum. The latter can be used to supply the - * checksum of a pseudo header as used by TCP/UDP. It returns the - * checksum. - * - * For protocols that contain complete checksums such as ICMP/TCP/UDP, - * this function can be used to verify that checksum on received - * packets. In that case the function should return zero if the - * checksum is correct. In particular, this function will return zero - * if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the - * hardware has already verified the correctness of the checksum. - */ -static inline __sum16 skb_checksum_complete(struct sk_buff *skb) -{ - return skb_csum_unnecessary(skb) ? - 0 : __skb_checksum_complete(skb); -} - -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) -extern void nf_conntrack_destroy(struct nf_conntrack *nfct); -static inline void nf_conntrack_put(struct nf_conntrack *nfct) -{ - if (nfct && atomic_dec_and_test(&nfct->use)) - nf_conntrack_destroy(nfct); -} -static inline void nf_conntrack_get(struct nf_conntrack *nfct) -{ - if (nfct) - atomic_inc(&nfct->use); -} -static inline void nf_conntrack_get_reasm(struct sk_buff *skb) -{ - if (skb) - atomic_inc(&skb->users); -} -static inline void nf_conntrack_put_reasm(struct sk_buff *skb) -{ - if (skb) - kfree_skb(skb); -} -#endif -#ifdef CONFIG_BRIDGE_NETFILTER -static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) -{ - if (nf_bridge && atomic_dec_and_test(&nf_bridge->use)) - kfree(nf_bridge); -} -static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) -{ - if (nf_bridge) - atomic_inc(&nf_bridge->use); -} -#endif /* CONFIG_BRIDGE_NETFILTER */ -static inline void nf_reset(struct sk_buff *skb) -{ -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put(skb->nfct); - skb->nfct = NULL; - nf_conntrack_put_reasm(skb->nfct_reasm); - skb->nfct_reasm = NULL; -#endif -#ifdef CONFIG_BRIDGE_NETFILTER - nf_bridge_put(skb->nf_bridge); - skb->nf_bridge = NULL; -#endif -} - -/* Note: This doesn't put any conntrack and bridge info in dst. */ -static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) -{ -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - dst->nfct = src->nfct; - nf_conntrack_get(src->nfct); - dst->nfctinfo = src->nfctinfo; - dst->nfct_reasm = src->nfct_reasm; - nf_conntrack_get_reasm(src->nfct_reasm); -#endif -#ifdef CONFIG_BRIDGE_NETFILTER - dst->nf_bridge = src->nf_bridge; - nf_bridge_get(src->nf_bridge); -#endif -} - -static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) -{ -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put(dst->nfct); - nf_conntrack_put_reasm(dst->nfct_reasm); -#endif -#ifdef CONFIG_BRIDGE_NETFILTER - nf_bridge_put(dst->nf_bridge); -#endif - __nf_copy(dst, src); -} - -#ifdef CONFIG_NETWORK_SECMARK -static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) -{ - to->secmark = from->secmark; -} - -static inline void skb_init_secmark(struct sk_buff *skb) -{ - skb->secmark = 0; -} -#else -static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) -{ } - -static inline void skb_init_secmark(struct sk_buff *skb) -{ } -#endif - -static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping) -{ - skb->queue_mapping = queue_mapping; -} - -static inline u16 skb_get_queue_mapping(struct sk_buff *skb) -{ - return skb->queue_mapping; -} - -static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from) -{ - to->queue_mapping = from->queue_mapping; -} - -static inline int skb_is_gso(const struct sk_buff *skb) -{ - return skb_shinfo(skb)->gso_size; -} - -static inline int skb_is_gso_v6(const struct sk_buff *skb) -{ - return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; -} - -extern void __skb_warn_lro_forwarding(const struct sk_buff *skb); - -static inline bool skb_warn_if_lro(const struct sk_buff *skb) -{ - /* LRO sets gso_size but not gso_type, whereas if GSO is really - * wanted then gso_type will be set. */ - struct skb_shared_info *shinfo = skb_shinfo(skb); - if (shinfo->gso_size != 0 && unlikely(shinfo->gso_type == 0)) { - __skb_warn_lro_forwarding(skb); - return true; - } - return false; -} - -static inline void skb_forward_csum(struct sk_buff *skb) -{ - /* Unfortunately we don't support this one. Any brave souls? */ - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; -} - -bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); -#endif /* __KERNEL__ */ -#endif /* _LINUX_SKBUFF_H */ diff -Nurb linux-2.6.27-720/include/linux/spinlock.h linux-2.6.27-710/include/linux/spinlock.h --- linux-2.6.27-720/include/linux/spinlock.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/spinlock.h 2008-10-09 18:13:53.000000000 -0400 @@ -54,7 +54,6 @@ #include #include #include -#include #include diff -Nurb linux-2.6.27-720/include/linux/spinlock_types.h linux-2.6.27-710/include/linux/spinlock_types.h --- linux-2.6.27-720/include/linux/spinlock_types.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/spinlock_types.h 2008-10-09 18:13:53.000000000 -0400 @@ -51,47 +51,37 @@ #define SPINLOCK_OWNER_INIT ((void *)-1L) -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) -# define SPINLOCK_BREAK_LOCK_INIT 0, -#else -# define SPINLOCK_BREAK_LOCK_INIT -#endif - #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define SPIN_DEP_MAP_INIT(lockname) { 0, 0, #lockname } +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } #else # define SPIN_DEP_MAP_INIT(lockname) #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define RW_DEP_MAP_INIT(lockname) { 0, 0, #lockname } +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } #else # define RW_DEP_MAP_INIT(lockname) #endif #ifdef CONFIG_DEBUG_SPINLOCK # define __SPIN_LOCK_UNLOCKED(lockname) \ - (spinlock_t) { /*raw_lock*/ __RAW_SPIN_LOCK_UNLOCKED, \ - /*break_lock*/ SPINLOCK_BREAK_LOCK_INIT \ - /*magic*/ SPINLOCK_MAGIC, \ - /*owner_cpu*/ -1, \ - /*owner*/ SPINLOCK_OWNER_INIT, \ + (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ + .magic = SPINLOCK_MAGIC, \ + .owner = SPINLOCK_OWNER_INIT, \ + .owner_cpu = -1, \ SPIN_DEP_MAP_INIT(lockname) } #define __RW_LOCK_UNLOCKED(lockname) \ - (rwlock_t) { /*raw_lock*/ __RAW_RW_LOCK_UNLOCKED, \ - /*break_lock*/ SPINLOCK_BREAK_LOCK_INIT \ - /*magic*/ RWLOCK_MAGIC, \ - /*owner_cpu*/ -1, \ - /*owner*/ SPINLOCK_OWNER_INIT, \ + (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ + .magic = RWLOCK_MAGIC, \ + .owner = SPINLOCK_OWNER_INIT, \ + .owner_cpu = -1, \ RW_DEP_MAP_INIT(lockname) } #else # define __SPIN_LOCK_UNLOCKED(lockname) \ - (spinlock_t) { /*raw_lock*/ __RAW_SPIN_LOCK_UNLOCKED, \ - /*break_lock*/ SPINLOCK_BREAK_LOCK_INIT \ + (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ SPIN_DEP_MAP_INIT(lockname) } #define __RW_LOCK_UNLOCKED(lockname) \ - (rwlock_t) { /*raw_lock*/ __RAW_RW_LOCK_UNLOCKED, \ - /*break_lock*/ SPINLOCK_BREAK_LOCK_INIT \ + (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ RW_DEP_MAP_INIT(lockname) } #endif diff -Nurb linux-2.6.27-720/include/linux/stddef.h linux-2.6.27-710/include/linux/stddef.h --- linux-2.6.27-720/include/linux/stddef.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/stddef.h 2008-10-09 18:13:53.000000000 -0400 @@ -12,12 +12,10 @@ #ifdef __KERNEL__ -#ifndef __cplusplus enum { false = 0, true = 1 }; -#endif #undef offsetof #ifdef __compiler_offsetof diff -Nurb linux-2.6.27-720/include/linux/sysctl.h linux-2.6.27-710/include/linux/sysctl.h --- linux-2.6.27-720/include/linux/sysctl.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/sysctl.h 2009-05-04 12:15:30.000000000 -0400 @@ -985,7 +985,7 @@ void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); -typedef int proc_handler_t (struct ctl_table *ctl, int write, struct file * filp, +typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos); extern int proc_dostring(struct ctl_table *, int, struct file *, @@ -1066,7 +1066,7 @@ mode_t mode; struct ctl_table *child; struct ctl_table *parent; /* Automatically set */ - proc_handler_t *proc_handler; /* Callback for text formatting */ + proc_handler *proc_handler; /* Callback for text formatting */ ctl_handler *strategy; /* Callback function for all r/w */ void *extra1; void *extra2; diff -Nurb linux-2.6.27-720/include/linux/sysctl.h.orig linux-2.6.27-710/include/linux/sysctl.h.orig --- linux-2.6.27-720/include/linux/sysctl.h.orig 2009-05-04 12:15:30.000000000 -0400 +++ linux-2.6.27-710/include/linux/sysctl.h.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,1120 +0,0 @@ -/* - * sysctl.h: General linux system control interface - * - * Begun 24 March 1995, Stephen Tweedie - * - **************************************************************** - **************************************************************** - ** - ** WARNING: - ** The values in this file are exported to user space via - ** the sysctl() binary interface. Do *NOT* change the - ** numbering of any existing values here, and do not change - ** any numbers within any one set of values. If you have to - ** redefine an existing interface, use a new number for it. - ** The kernel will then return -ENOTDIR to any application using - ** the old binary interface. - ** - ** For new interfaces unless you really need a binary number - ** please use CTL_UNNUMBERED. - ** - **************************************************************** - **************************************************************** - */ - -#ifndef _LINUX_SYSCTL_H -#define _LINUX_SYSCTL_H - -#include -#include -#include - -struct file; -struct completion; - -#define CTL_MAXNAME 10 /* how many path components do we allow in a - call to sysctl? In other words, what is - the largest acceptable value for the nlen - member of a struct __sysctl_args to have? */ - -struct __sysctl_args { - int __user *name; - int nlen; - void __user *oldval; - size_t __user *oldlenp; - void __user *newval; - size_t newlen; - unsigned long __unused[4]; -}; - -/* Define sysctl names first */ - -/* Top-level names: */ - -/* For internal pattern-matching use only: */ -#ifdef __KERNEL__ -#define CTL_NONE 0 -#define CTL_UNNUMBERED CTL_NONE /* sysctl without a binary number */ -#endif - -enum -{ - CTL_KERN=1, /* General kernel info and control */ - CTL_VM=2, /* VM management */ - CTL_NET=3, /* Networking */ - CTL_PROC=4, /* removal breaks strace(1) compilation */ - CTL_FS=5, /* Filesystems */ - CTL_DEBUG=6, /* Debugging */ - CTL_DEV=7, /* Devices */ - CTL_BUS=8, /* Busses */ - CTL_ABI=9, /* Binary emulation */ - CTL_CPU=10, /* CPU stuff (speed scaling, etc) */ - CTL_ARLAN=254, /* arlan wireless driver */ - CTL_VSERVER=4242, /* Linux-VServer debug */ - CTL_S390DBF=5677, /* s390 debug */ - CTL_SUNRPC=7249, /* sunrpc debug */ - CTL_PM=9899, /* frv power management */ - CTL_FRV=9898, /* frv specific sysctls */ -}; - -/* CTL_BUS names: */ -enum -{ - CTL_BUS_ISA=1 /* ISA */ -}; - -/* /proc/sys/fs/inotify/ */ -enum -{ - INOTIFY_MAX_USER_INSTANCES=1, /* max instances per user */ - INOTIFY_MAX_USER_WATCHES=2, /* max watches per user */ - INOTIFY_MAX_QUEUED_EVENTS=3 /* max queued events per instance */ -}; - -/* CTL_KERN names: */ -enum -{ - KERN_OSTYPE=1, /* string: system version */ - KERN_OSRELEASE=2, /* string: system release */ - KERN_OSREV=3, /* int: system revision */ - KERN_VERSION=4, /* string: compile time info */ - KERN_SECUREMASK=5, /* struct: maximum rights mask */ - KERN_PROF=6, /* table: profiling information */ - KERN_NODENAME=7, - KERN_DOMAINNAME=8, - - KERN_PANIC=15, /* int: panic timeout */ - KERN_REALROOTDEV=16, /* real root device to mount after initrd */ - KERN_VSHELPER=17, /* string: path to vshelper policy agent */ - - KERN_SPARC_REBOOT=21, /* reboot command on Sparc */ - KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */ - KERN_PRINTK=23, /* struct: control printk logging parameters */ - KERN_NAMETRANS=24, /* Name translation */ - KERN_PPC_HTABRECLAIM=25, /* turn htab reclaimation on/off on PPC */ - KERN_PPC_ZEROPAGED=26, /* turn idle page zeroing on/off on PPC */ - KERN_PPC_POWERSAVE_NAP=27, /* use nap mode for power saving */ - KERN_MODPROBE=28, - KERN_SG_BIG_BUFF=29, - KERN_ACCT=30, /* BSD process accounting parameters */ - KERN_PPC_L2CR=31, /* l2cr register on PPC */ - - KERN_RTSIGNR=32, /* Number of rt sigs queued */ - KERN_RTSIGMAX=33, /* Max queuable */ - - KERN_SHMMAX=34, /* long: Maximum shared memory segment */ - KERN_MSGMAX=35, /* int: Maximum size of a messege */ - KERN_MSGMNB=36, /* int: Maximum message queue size */ - KERN_MSGPOOL=37, /* int: Maximum system message pool size */ - KERN_SYSRQ=38, /* int: Sysreq enable */ - KERN_MAX_THREADS=39, /* int: Maximum nr of threads in the system */ - KERN_RANDOM=40, /* Random driver */ - KERN_SHMALL=41, /* int: Maximum size of shared memory */ - KERN_MSGMNI=42, /* int: msg queue identifiers */ - KERN_SEM=43, /* struct: sysv semaphore limits */ - KERN_SPARC_STOP_A=44, /* int: Sparc Stop-A enable */ - KERN_SHMMNI=45, /* int: shm array identifiers */ - KERN_OVERFLOWUID=46, /* int: overflow UID */ - KERN_OVERFLOWGID=47, /* int: overflow GID */ - KERN_SHMPATH=48, /* string: path to shm fs */ - KERN_HOTPLUG=49, /* string: path to uevent helper (deprecated) */ - KERN_IEEE_EMULATION_WARNINGS=50, /* int: unimplemented ieee instructions */ - KERN_S390_USER_DEBUG_LOGGING=51, /* int: dumps of user faults */ - KERN_CORE_USES_PID=52, /* int: use core or core.%pid */ - KERN_TAINTED=53, /* int: various kernel tainted flags */ - KERN_CADPID=54, /* int: PID of the process to notify on CAD */ - KERN_PIDMAX=55, /* int: PID # limit */ - KERN_CORE_PATTERN=56, /* string: pattern for core-file names */ - KERN_PANIC_ON_OOPS=57, /* int: whether we will panic on an oops */ - KERN_HPPA_PWRSW=58, /* int: hppa soft-power enable */ - KERN_HPPA_UNALIGNED=59, /* int: hppa unaligned-trap enable */ - KERN_PRINTK_RATELIMIT=60, /* int: tune printk ratelimiting */ - KERN_PRINTK_RATELIMIT_BURST=61, /* int: tune printk ratelimiting */ - KERN_PTY=62, /* dir: pty driver */ - KERN_NGROUPS_MAX=63, /* int: NGROUPS_MAX */ - KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */ - KERN_HZ_TIMER=65, /* int: hz timer on or off */ - KERN_UNKNOWN_NMI_PANIC=66, /* int: unknown nmi panic flag */ - KERN_BOOTLOADER_TYPE=67, /* int: boot loader type */ - KERN_RANDOMIZE=68, /* int: randomize virtual address space */ - KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */ - KERN_SPIN_RETRY=70, /* int: number of spinlock retries */ - KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ - KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ - KERN_COMPAT_LOG=73, /* int: print compat layer messages */ - KERN_MAX_LOCK_DEPTH=74, - KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ - KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ -}; - - - -/* CTL_VM names: */ -enum -{ - VM_UNUSED1=1, /* was: struct: Set vm swapping control */ - VM_UNUSED2=2, /* was; int: Linear or sqrt() swapout for hogs */ - VM_UNUSED3=3, /* was: struct: Set free page thresholds */ - VM_UNUSED4=4, /* Spare */ - VM_OVERCOMMIT_MEMORY=5, /* Turn off the virtual memory safety limit */ - VM_UNUSED5=6, /* was: struct: Set buffer memory thresholds */ - VM_UNUSED7=7, /* was: struct: Set cache memory thresholds */ - VM_UNUSED8=8, /* was: struct: Control kswapd behaviour */ - VM_UNUSED9=9, /* was: struct: Set page table cache parameters */ - VM_PAGE_CLUSTER=10, /* int: set number of pages to swap together */ - VM_DIRTY_BACKGROUND=11, /* dirty_background_ratio */ - VM_DIRTY_RATIO=12, /* dirty_ratio */ - VM_DIRTY_WB_CS=13, /* dirty_writeback_centisecs */ - VM_DIRTY_EXPIRE_CS=14, /* dirty_expire_centisecs */ - VM_NR_PDFLUSH_THREADS=15, /* nr_pdflush_threads */ - VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */ - VM_PAGEBUF=17, /* struct: Control pagebuf parameters */ - VM_HUGETLB_PAGES=18, /* int: Number of available Huge Pages */ - VM_SWAPPINESS=19, /* Tendency to steal mapped memory */ - VM_LOWMEM_RESERVE_RATIO=20,/* reservation ratio for lower memory zones */ - VM_MIN_FREE_KBYTES=21, /* Minimum free kilobytes to maintain */ - VM_MAX_MAP_COUNT=22, /* int: Maximum number of mmaps/address-space */ - VM_LAPTOP_MODE=23, /* vm laptop mode */ - VM_BLOCK_DUMP=24, /* block dump mode */ - VM_HUGETLB_GROUP=25, /* permitted hugetlb group */ - VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ - VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ - VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ - VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ - VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ - VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ - VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ - VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ - VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ - VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ -}; - - -/* CTL_NET names: */ -enum -{ - NET_CORE=1, - NET_ETHER=2, - NET_802=3, - NET_UNIX=4, - NET_IPV4=5, - NET_IPX=6, - NET_ATALK=7, - NET_NETROM=8, - NET_AX25=9, - NET_BRIDGE=10, - NET_ROSE=11, - NET_IPV6=12, - NET_X25=13, - NET_TR=14, - NET_DECNET=15, - NET_ECONET=16, - NET_SCTP=17, - NET_LLC=18, - NET_NETFILTER=19, - NET_DCCP=20, - NET_IRDA=412, -}; - -/* /proc/sys/kernel/random */ -enum -{ - RANDOM_POOLSIZE=1, - RANDOM_ENTROPY_COUNT=2, - RANDOM_READ_THRESH=3, - RANDOM_WRITE_THRESH=4, - RANDOM_BOOT_ID=5, - RANDOM_UUID=6 -}; - -/* /proc/sys/kernel/pty */ -enum -{ - PTY_MAX=1, - PTY_NR=2 -}; - -/* /proc/sys/bus/isa */ -enum -{ - BUS_ISA_MEM_BASE=1, - BUS_ISA_PORT_BASE=2, - BUS_ISA_PORT_SHIFT=3 -}; - -/* /proc/sys/net/core */ -enum -{ - NET_CORE_WMEM_MAX=1, - NET_CORE_RMEM_MAX=2, - NET_CORE_WMEM_DEFAULT=3, - NET_CORE_RMEM_DEFAULT=4, -/* was NET_CORE_DESTROY_DELAY */ - NET_CORE_MAX_BACKLOG=6, - NET_CORE_FASTROUTE=7, - NET_CORE_MSG_COST=8, - NET_CORE_MSG_BURST=9, - NET_CORE_OPTMEM_MAX=10, - NET_CORE_HOT_LIST_LENGTH=11, - NET_CORE_DIVERT_VERSION=12, - NET_CORE_NO_CONG_THRESH=13, - NET_CORE_NO_CONG=14, - NET_CORE_LO_CONG=15, - NET_CORE_MOD_CONG=16, - NET_CORE_DEV_WEIGHT=17, - NET_CORE_SOMAXCONN=18, - NET_CORE_BUDGET=19, - NET_CORE_AEVENT_ETIME=20, - NET_CORE_AEVENT_RSEQTH=21, - NET_CORE_WARNINGS=22, -}; - -/* /proc/sys/net/ethernet */ - -/* /proc/sys/net/802 */ - -/* /proc/sys/net/unix */ - -enum -{ - NET_UNIX_DESTROY_DELAY=1, - NET_UNIX_DELETE_DELAY=2, - NET_UNIX_MAX_DGRAM_QLEN=3, -}; - -/* /proc/sys/net/netfilter */ -enum -{ - NET_NF_CONNTRACK_MAX=1, - NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2, - NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3, - NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4, - NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5, - NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6, - NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7, - NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8, - NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9, - NET_NF_CONNTRACK_UDP_TIMEOUT=10, - NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11, - NET_NF_CONNTRACK_ICMP_TIMEOUT=12, - NET_NF_CONNTRACK_GENERIC_TIMEOUT=13, - NET_NF_CONNTRACK_BUCKETS=14, - NET_NF_CONNTRACK_LOG_INVALID=15, - NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16, - NET_NF_CONNTRACK_TCP_LOOSE=17, - NET_NF_CONNTRACK_TCP_BE_LIBERAL=18, - NET_NF_CONNTRACK_TCP_MAX_RETRANS=19, - NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20, - NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21, - NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22, - NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23, - NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24, - NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25, - NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26, - NET_NF_CONNTRACK_COUNT=27, - NET_NF_CONNTRACK_ICMPV6_TIMEOUT=28, - NET_NF_CONNTRACK_FRAG6_TIMEOUT=29, - NET_NF_CONNTRACK_FRAG6_LOW_THRESH=30, - NET_NF_CONNTRACK_FRAG6_HIGH_THRESH=31, - NET_NF_CONNTRACK_CHECKSUM=32, -}; - -/* /proc/sys/net/ipv4 */ -enum -{ - /* v2.0 compatibile variables */ - NET_IPV4_FORWARD=8, - NET_IPV4_DYNADDR=9, - - NET_IPV4_CONF=16, - NET_IPV4_NEIGH=17, - NET_IPV4_ROUTE=18, - NET_IPV4_FIB_HASH=19, - NET_IPV4_NETFILTER=20, - - NET_IPV4_TCP_TIMESTAMPS=33, - NET_IPV4_TCP_WINDOW_SCALING=34, - NET_IPV4_TCP_SACK=35, - NET_IPV4_TCP_RETRANS_COLLAPSE=36, - NET_IPV4_DEFAULT_TTL=37, - NET_IPV4_AUTOCONFIG=38, - NET_IPV4_NO_PMTU_DISC=39, - NET_IPV4_TCP_SYN_RETRIES=40, - NET_IPV4_IPFRAG_HIGH_THRESH=41, - NET_IPV4_IPFRAG_LOW_THRESH=42, - NET_IPV4_IPFRAG_TIME=43, - NET_IPV4_TCP_MAX_KA_PROBES=44, - NET_IPV4_TCP_KEEPALIVE_TIME=45, - NET_IPV4_TCP_KEEPALIVE_PROBES=46, - NET_IPV4_TCP_RETRIES1=47, - NET_IPV4_TCP_RETRIES2=48, - NET_IPV4_TCP_FIN_TIMEOUT=49, - NET_IPV4_IP_MASQ_DEBUG=50, - NET_TCP_SYNCOOKIES=51, - NET_TCP_STDURG=52, - NET_TCP_RFC1337=53, - NET_TCP_SYN_TAILDROP=54, - NET_TCP_MAX_SYN_BACKLOG=55, - NET_IPV4_LOCAL_PORT_RANGE=56, - NET_IPV4_ICMP_ECHO_IGNORE_ALL=57, - NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS=58, - NET_IPV4_ICMP_SOURCEQUENCH_RATE=59, - NET_IPV4_ICMP_DESTUNREACH_RATE=60, - NET_IPV4_ICMP_TIMEEXCEED_RATE=61, - NET_IPV4_ICMP_PARAMPROB_RATE=62, - NET_IPV4_ICMP_ECHOREPLY_RATE=63, - NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES=64, - NET_IPV4_IGMP_MAX_MEMBERSHIPS=65, - NET_TCP_TW_RECYCLE=66, - NET_IPV4_ALWAYS_DEFRAG=67, - NET_IPV4_TCP_KEEPALIVE_INTVL=68, - NET_IPV4_INET_PEER_THRESHOLD=69, - NET_IPV4_INET_PEER_MINTTL=70, - NET_IPV4_INET_PEER_MAXTTL=71, - NET_IPV4_INET_PEER_GC_MINTIME=72, - NET_IPV4_INET_PEER_GC_MAXTIME=73, - NET_TCP_ORPHAN_RETRIES=74, - NET_TCP_ABORT_ON_OVERFLOW=75, - NET_TCP_SYNACK_RETRIES=76, - NET_TCP_MAX_ORPHANS=77, - NET_TCP_MAX_TW_BUCKETS=78, - NET_TCP_FACK=79, - NET_TCP_REORDERING=80, - NET_TCP_ECN=81, - NET_TCP_DSACK=82, - NET_TCP_MEM=83, - NET_TCP_WMEM=84, - NET_TCP_RMEM=85, - NET_TCP_APP_WIN=86, - NET_TCP_ADV_WIN_SCALE=87, - NET_IPV4_NONLOCAL_BIND=88, - NET_IPV4_ICMP_RATELIMIT=89, - NET_IPV4_ICMP_RATEMASK=90, - NET_TCP_TW_REUSE=91, - NET_TCP_FRTO=92, - NET_TCP_LOW_LATENCY=93, - NET_IPV4_IPFRAG_SECRET_INTERVAL=94, - NET_IPV4_IGMP_MAX_MSF=96, - NET_TCP_NO_METRICS_SAVE=97, - NET_TCP_DEFAULT_WIN_SCALE=105, - NET_TCP_MODERATE_RCVBUF=106, - NET_TCP_TSO_WIN_DIVISOR=107, - NET_TCP_BIC_BETA=108, - NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, - NET_TCP_CONG_CONTROL=110, - NET_TCP_ABC=111, - NET_IPV4_IPFRAG_MAX_DIST=112, - NET_TCP_MTU_PROBING=113, - NET_TCP_BASE_MSS=114, - NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115, - NET_TCP_DMA_COPYBREAK=116, - NET_TCP_SLOW_START_AFTER_IDLE=117, - NET_CIPSOV4_CACHE_ENABLE=118, - NET_CIPSOV4_CACHE_BUCKET_SIZE=119, - NET_CIPSOV4_RBM_OPTFMT=120, - NET_CIPSOV4_RBM_STRICTVALID=121, - NET_TCP_AVAIL_CONG_CONTROL=122, - NET_TCP_ALLOWED_CONG_CONTROL=123, - NET_TCP_MAX_SSTHRESH=124, - NET_TCP_FRTO_RESPONSE=125, -#ifdef CONFIG_ICMP_IPOD - NET_IPV4_ICMP_IPOD_VERSION, - NET_IPV4_ICMP_IPOD_ENABLED, - NET_IPV4_ICMP_IPOD_HOST, - NET_IPV4_ICMP_IPOD_MASK, - NET_IPV4_ICMP_IPOD_KEY -#endif -}; - -enum { - NET_IPV4_ROUTE_FLUSH=1, - NET_IPV4_ROUTE_MIN_DELAY=2, /* obsolete since 2.6.25 */ - NET_IPV4_ROUTE_MAX_DELAY=3, /* obsolete since 2.6.25 */ - NET_IPV4_ROUTE_GC_THRESH=4, - NET_IPV4_ROUTE_MAX_SIZE=5, - NET_IPV4_ROUTE_GC_MIN_INTERVAL=6, - NET_IPV4_ROUTE_GC_TIMEOUT=7, - NET_IPV4_ROUTE_GC_INTERVAL=8, - NET_IPV4_ROUTE_REDIRECT_LOAD=9, - NET_IPV4_ROUTE_REDIRECT_NUMBER=10, - NET_IPV4_ROUTE_REDIRECT_SILENCE=11, - NET_IPV4_ROUTE_ERROR_COST=12, - NET_IPV4_ROUTE_ERROR_BURST=13, - NET_IPV4_ROUTE_GC_ELASTICITY=14, - NET_IPV4_ROUTE_MTU_EXPIRES=15, - NET_IPV4_ROUTE_MIN_PMTU=16, - NET_IPV4_ROUTE_MIN_ADVMSS=17, - NET_IPV4_ROUTE_SECRET_INTERVAL=18, - NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS=19, -}; - -enum -{ - NET_PROTO_CONF_ALL=-2, - NET_PROTO_CONF_DEFAULT=-3 - - /* And device ifindices ... */ -}; - -enum -{ - NET_IPV4_CONF_FORWARDING=1, - NET_IPV4_CONF_MC_FORWARDING=2, - NET_IPV4_CONF_PROXY_ARP=3, - NET_IPV4_CONF_ACCEPT_REDIRECTS=4, - NET_IPV4_CONF_SECURE_REDIRECTS=5, - NET_IPV4_CONF_SEND_REDIRECTS=6, - NET_IPV4_CONF_SHARED_MEDIA=7, - NET_IPV4_CONF_RP_FILTER=8, - NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE=9, - NET_IPV4_CONF_BOOTP_RELAY=10, - NET_IPV4_CONF_LOG_MARTIANS=11, - NET_IPV4_CONF_TAG=12, - NET_IPV4_CONF_ARPFILTER=13, - NET_IPV4_CONF_MEDIUM_ID=14, - NET_IPV4_CONF_NOXFRM=15, - NET_IPV4_CONF_NOPOLICY=16, - NET_IPV4_CONF_FORCE_IGMP_VERSION=17, - NET_IPV4_CONF_ARP_ANNOUNCE=18, - NET_IPV4_CONF_ARP_IGNORE=19, - NET_IPV4_CONF_PROMOTE_SECONDARIES=20, - NET_IPV4_CONF_ARP_ACCEPT=21, - __NET_IPV4_CONF_MAX -}; - -/* /proc/sys/net/ipv4/netfilter */ -enum -{ - NET_IPV4_NF_CONNTRACK_MAX=1, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9, - NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT=10, - NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11, - NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT=12, - NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT=13, - NET_IPV4_NF_CONNTRACK_BUCKETS=14, - NET_IPV4_NF_CONNTRACK_LOG_INVALID=15, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16, - NET_IPV4_NF_CONNTRACK_TCP_LOOSE=17, - NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL=18, - NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS=19, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26, - NET_IPV4_NF_CONNTRACK_COUNT=27, - NET_IPV4_NF_CONNTRACK_CHECKSUM=28, -}; - -/* /proc/sys/net/ipv6 */ -enum { - NET_IPV6_CONF=16, - NET_IPV6_NEIGH=17, - NET_IPV6_ROUTE=18, - NET_IPV6_ICMP=19, - NET_IPV6_BINDV6ONLY=20, - NET_IPV6_IP6FRAG_HIGH_THRESH=21, - NET_IPV6_IP6FRAG_LOW_THRESH=22, - NET_IPV6_IP6FRAG_TIME=23, - NET_IPV6_IP6FRAG_SECRET_INTERVAL=24, - NET_IPV6_MLD_MAX_MSF=25, -}; - -enum { - NET_IPV6_ROUTE_FLUSH=1, - NET_IPV6_ROUTE_GC_THRESH=2, - NET_IPV6_ROUTE_MAX_SIZE=3, - NET_IPV6_ROUTE_GC_MIN_INTERVAL=4, - NET_IPV6_ROUTE_GC_TIMEOUT=5, - NET_IPV6_ROUTE_GC_INTERVAL=6, - NET_IPV6_ROUTE_GC_ELASTICITY=7, - NET_IPV6_ROUTE_MTU_EXPIRES=8, - NET_IPV6_ROUTE_MIN_ADVMSS=9, - NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS=10 -}; - -enum { - NET_IPV6_FORWARDING=1, - NET_IPV6_HOP_LIMIT=2, - NET_IPV6_MTU=3, - NET_IPV6_ACCEPT_RA=4, - NET_IPV6_ACCEPT_REDIRECTS=5, - NET_IPV6_AUTOCONF=6, - NET_IPV6_DAD_TRANSMITS=7, - NET_IPV6_RTR_SOLICITS=8, - NET_IPV6_RTR_SOLICIT_INTERVAL=9, - NET_IPV6_RTR_SOLICIT_DELAY=10, - NET_IPV6_USE_TEMPADDR=11, - NET_IPV6_TEMP_VALID_LFT=12, - NET_IPV6_TEMP_PREFERED_LFT=13, - NET_IPV6_REGEN_MAX_RETRY=14, - NET_IPV6_MAX_DESYNC_FACTOR=15, - NET_IPV6_MAX_ADDRESSES=16, - NET_IPV6_FORCE_MLD_VERSION=17, - NET_IPV6_ACCEPT_RA_DEFRTR=18, - NET_IPV6_ACCEPT_RA_PINFO=19, - NET_IPV6_ACCEPT_RA_RTR_PREF=20, - NET_IPV6_RTR_PROBE_INTERVAL=21, - NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22, - NET_IPV6_PROXY_NDP=23, - NET_IPV6_ACCEPT_SOURCE_ROUTE=25, - __NET_IPV6_MAX -}; - -/* /proc/sys/net/ipv6/icmp */ -enum { - NET_IPV6_ICMP_RATELIMIT=1 -}; - -/* /proc/sys/net//neigh/ */ -enum { - NET_NEIGH_MCAST_SOLICIT=1, - NET_NEIGH_UCAST_SOLICIT=2, - NET_NEIGH_APP_SOLICIT=3, - NET_NEIGH_RETRANS_TIME=4, - NET_NEIGH_REACHABLE_TIME=5, - NET_NEIGH_DELAY_PROBE_TIME=6, - NET_NEIGH_GC_STALE_TIME=7, - NET_NEIGH_UNRES_QLEN=8, - NET_NEIGH_PROXY_QLEN=9, - NET_NEIGH_ANYCAST_DELAY=10, - NET_NEIGH_PROXY_DELAY=11, - NET_NEIGH_LOCKTIME=12, - NET_NEIGH_GC_INTERVAL=13, - NET_NEIGH_GC_THRESH1=14, - NET_NEIGH_GC_THRESH2=15, - NET_NEIGH_GC_THRESH3=16, - NET_NEIGH_RETRANS_TIME_MS=17, - NET_NEIGH_REACHABLE_TIME_MS=18, - __NET_NEIGH_MAX -}; - -/* /proc/sys/net/dccp */ -enum { - NET_DCCP_DEFAULT=1, -}; - -/* /proc/sys/net/ipx */ -enum { - NET_IPX_PPROP_BROADCASTING=1, - NET_IPX_FORWARDING=2 -}; - -/* /proc/sys/net/llc */ -enum { - NET_LLC2=1, - NET_LLC_STATION=2, -}; - -/* /proc/sys/net/llc/llc2 */ -enum { - NET_LLC2_TIMEOUT=1, -}; - -/* /proc/sys/net/llc/station */ -enum { - NET_LLC_STATION_ACK_TIMEOUT=1, -}; - -/* /proc/sys/net/llc/llc2/timeout */ -enum { - NET_LLC2_ACK_TIMEOUT=1, - NET_LLC2_P_TIMEOUT=2, - NET_LLC2_REJ_TIMEOUT=3, - NET_LLC2_BUSY_TIMEOUT=4, -}; - -/* /proc/sys/net/appletalk */ -enum { - NET_ATALK_AARP_EXPIRY_TIME=1, - NET_ATALK_AARP_TICK_TIME=2, - NET_ATALK_AARP_RETRANSMIT_LIMIT=3, - NET_ATALK_AARP_RESOLVE_TIME=4 -}; - - -/* /proc/sys/net/netrom */ -enum { - NET_NETROM_DEFAULT_PATH_QUALITY=1, - NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER=2, - NET_NETROM_NETWORK_TTL_INITIALISER=3, - NET_NETROM_TRANSPORT_TIMEOUT=4, - NET_NETROM_TRANSPORT_MAXIMUM_TRIES=5, - NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY=6, - NET_NETROM_TRANSPORT_BUSY_DELAY=7, - NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE=8, - NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT=9, - NET_NETROM_ROUTING_CONTROL=10, - NET_NETROM_LINK_FAILS_COUNT=11, - NET_NETROM_RESET=12 -}; - -/* /proc/sys/net/ax25 */ -enum { - NET_AX25_IP_DEFAULT_MODE=1, - NET_AX25_DEFAULT_MODE=2, - NET_AX25_BACKOFF_TYPE=3, - NET_AX25_CONNECT_MODE=4, - NET_AX25_STANDARD_WINDOW=5, - NET_AX25_EXTENDED_WINDOW=6, - NET_AX25_T1_TIMEOUT=7, - NET_AX25_T2_TIMEOUT=8, - NET_AX25_T3_TIMEOUT=9, - NET_AX25_IDLE_TIMEOUT=10, - NET_AX25_N2=11, - NET_AX25_PACLEN=12, - NET_AX25_PROTOCOL=13, - NET_AX25_DAMA_SLAVE_TIMEOUT=14 -}; - -/* /proc/sys/net/rose */ -enum { - NET_ROSE_RESTART_REQUEST_TIMEOUT=1, - NET_ROSE_CALL_REQUEST_TIMEOUT=2, - NET_ROSE_RESET_REQUEST_TIMEOUT=3, - NET_ROSE_CLEAR_REQUEST_TIMEOUT=4, - NET_ROSE_ACK_HOLD_BACK_TIMEOUT=5, - NET_ROSE_ROUTING_CONTROL=6, - NET_ROSE_LINK_FAIL_TIMEOUT=7, - NET_ROSE_MAX_VCS=8, - NET_ROSE_WINDOW_SIZE=9, - NET_ROSE_NO_ACTIVITY_TIMEOUT=10 -}; - -/* /proc/sys/net/x25 */ -enum { - NET_X25_RESTART_REQUEST_TIMEOUT=1, - NET_X25_CALL_REQUEST_TIMEOUT=2, - NET_X25_RESET_REQUEST_TIMEOUT=3, - NET_X25_CLEAR_REQUEST_TIMEOUT=4, - NET_X25_ACK_HOLD_BACK_TIMEOUT=5, - NET_X25_FORWARD=6 -}; - -/* /proc/sys/net/token-ring */ -enum -{ - NET_TR_RIF_TIMEOUT=1 -}; - -/* /proc/sys/net/decnet/ */ -enum { - NET_DECNET_NODE_TYPE = 1, - NET_DECNET_NODE_ADDRESS = 2, - NET_DECNET_NODE_NAME = 3, - NET_DECNET_DEFAULT_DEVICE = 4, - NET_DECNET_TIME_WAIT = 5, - NET_DECNET_DN_COUNT = 6, - NET_DECNET_DI_COUNT = 7, - NET_DECNET_DR_COUNT = 8, - NET_DECNET_DST_GC_INTERVAL = 9, - NET_DECNET_CONF = 10, - NET_DECNET_NO_FC_MAX_CWND = 11, - NET_DECNET_MEM = 12, - NET_DECNET_RMEM = 13, - NET_DECNET_WMEM = 14, - NET_DECNET_DEBUG_LEVEL = 255 -}; - -/* /proc/sys/net/decnet/conf/ */ -enum { - NET_DECNET_CONF_LOOPBACK = -2, - NET_DECNET_CONF_DDCMP = -3, - NET_DECNET_CONF_PPP = -4, - NET_DECNET_CONF_X25 = -5, - NET_DECNET_CONF_GRE = -6, - NET_DECNET_CONF_ETHER = -7 - - /* ... and ifindex of devices */ -}; - -/* /proc/sys/net/decnet/conf// */ -enum { - NET_DECNET_CONF_DEV_PRIORITY = 1, - NET_DECNET_CONF_DEV_T1 = 2, - NET_DECNET_CONF_DEV_T2 = 3, - NET_DECNET_CONF_DEV_T3 = 4, - NET_DECNET_CONF_DEV_FORWARDING = 5, - NET_DECNET_CONF_DEV_BLKSIZE = 6, - NET_DECNET_CONF_DEV_STATE = 7 -}; - -/* /proc/sys/net/sctp */ -enum { - NET_SCTP_RTO_INITIAL = 1, - NET_SCTP_RTO_MIN = 2, - NET_SCTP_RTO_MAX = 3, - NET_SCTP_RTO_ALPHA = 4, - NET_SCTP_RTO_BETA = 5, - NET_SCTP_VALID_COOKIE_LIFE = 6, - NET_SCTP_ASSOCIATION_MAX_RETRANS = 7, - NET_SCTP_PATH_MAX_RETRANS = 8, - NET_SCTP_MAX_INIT_RETRANSMITS = 9, - NET_SCTP_HB_INTERVAL = 10, - NET_SCTP_PRESERVE_ENABLE = 11, - NET_SCTP_MAX_BURST = 12, - NET_SCTP_ADDIP_ENABLE = 13, - NET_SCTP_PRSCTP_ENABLE = 14, - NET_SCTP_SNDBUF_POLICY = 15, - NET_SCTP_SACK_TIMEOUT = 16, - NET_SCTP_RCVBUF_POLICY = 17, -}; - -/* /proc/sys/net/bridge */ -enum { - NET_BRIDGE_NF_CALL_ARPTABLES = 1, - NET_BRIDGE_NF_CALL_IPTABLES = 2, - NET_BRIDGE_NF_CALL_IP6TABLES = 3, - NET_BRIDGE_NF_FILTER_VLAN_TAGGED = 4, - NET_BRIDGE_NF_FILTER_PPPOE_TAGGED = 5, -}; - -/* proc/sys/net/irda */ -enum { - NET_IRDA_DISCOVERY=1, - NET_IRDA_DEVNAME=2, - NET_IRDA_DEBUG=3, - NET_IRDA_FAST_POLL=4, - NET_IRDA_DISCOVERY_SLOTS=5, - NET_IRDA_DISCOVERY_TIMEOUT=6, - NET_IRDA_SLOT_TIMEOUT=7, - NET_IRDA_MAX_BAUD_RATE=8, - NET_IRDA_MIN_TX_TURN_TIME=9, - NET_IRDA_MAX_TX_DATA_SIZE=10, - NET_IRDA_MAX_TX_WINDOW=11, - NET_IRDA_MAX_NOREPLY_TIME=12, - NET_IRDA_WARN_NOREPLY_TIME=13, - NET_IRDA_LAP_KEEPALIVE_TIME=14, -}; - - -/* CTL_FS names: */ -enum -{ - FS_NRINODE=1, /* int:current number of allocated inodes */ - FS_STATINODE=2, - FS_MAXINODE=3, /* int:maximum number of inodes that can be allocated */ - FS_NRDQUOT=4, /* int:current number of allocated dquots */ - FS_MAXDQUOT=5, /* int:maximum number of dquots that can be allocated */ - FS_NRFILE=6, /* int:current number of allocated filedescriptors */ - FS_MAXFILE=7, /* int:maximum number of filedescriptors that can be allocated */ - FS_DENTRY=8, - FS_NRSUPER=9, /* int:current number of allocated super_blocks */ - FS_MAXSUPER=10, /* int:maximum number of super_blocks that can be allocated */ - FS_OVERFLOWUID=11, /* int: overflow UID */ - FS_OVERFLOWGID=12, /* int: overflow GID */ - FS_LEASES=13, /* int: leases enabled */ - FS_DIR_NOTIFY=14, /* int: directory notification enabled */ - FS_LEASE_TIME=15, /* int: maximum time to wait for a lease break */ - FS_DQSTATS=16, /* disc quota usage statistics and control */ - FS_XFS=17, /* struct: control xfs parameters */ - FS_AIO_NR=18, /* current system-wide number of aio requests */ - FS_AIO_MAX_NR=19, /* system-wide maximum number of aio requests */ - FS_INOTIFY=20, /* inotify submenu */ - FS_OCFS2=988, /* ocfs2 */ -}; - -/* /proc/sys/fs/quota/ */ -enum { - FS_DQ_LOOKUPS = 1, - FS_DQ_DROPS = 2, - FS_DQ_READS = 3, - FS_DQ_WRITES = 4, - FS_DQ_CACHE_HITS = 5, - FS_DQ_ALLOCATED = 6, - FS_DQ_FREE = 7, - FS_DQ_SYNCS = 8, - FS_DQ_WARNINGS = 9, -}; - -/* CTL_DEBUG names: */ - -/* CTL_DEV names: */ -enum { - DEV_CDROM=1, - DEV_HWMON=2, - DEV_PARPORT=3, - DEV_RAID=4, - DEV_MAC_HID=5, - DEV_SCSI=6, - DEV_IPMI=7, -}; - -/* /proc/sys/dev/cdrom */ -enum { - DEV_CDROM_INFO=1, - DEV_CDROM_AUTOCLOSE=2, - DEV_CDROM_AUTOEJECT=3, - DEV_CDROM_DEBUG=4, - DEV_CDROM_LOCK=5, - DEV_CDROM_CHECK_MEDIA=6 -}; - -/* /proc/sys/dev/parport */ -enum { - DEV_PARPORT_DEFAULT=-3 -}; - -/* /proc/sys/dev/raid */ -enum { - DEV_RAID_SPEED_LIMIT_MIN=1, - DEV_RAID_SPEED_LIMIT_MAX=2 -}; - -/* /proc/sys/dev/parport/default */ -enum { - DEV_PARPORT_DEFAULT_TIMESLICE=1, - DEV_PARPORT_DEFAULT_SPINTIME=2 -}; - -/* /proc/sys/dev/parport/parport n */ -enum { - DEV_PARPORT_SPINTIME=1, - DEV_PARPORT_BASE_ADDR=2, - DEV_PARPORT_IRQ=3, - DEV_PARPORT_DMA=4, - DEV_PARPORT_MODES=5, - DEV_PARPORT_DEVICES=6, - DEV_PARPORT_AUTOPROBE=16 -}; - -/* /proc/sys/dev/parport/parport n/devices/ */ -enum { - DEV_PARPORT_DEVICES_ACTIVE=-3, -}; - -/* /proc/sys/dev/parport/parport n/devices/device n */ -enum { - DEV_PARPORT_DEVICE_TIMESLICE=1, -}; - -/* /proc/sys/dev/mac_hid */ -enum { - DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES=1, - DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES=2, - DEV_MAC_HID_MOUSE_BUTTON_EMULATION=3, - DEV_MAC_HID_MOUSE_BUTTON2_KEYCODE=4, - DEV_MAC_HID_MOUSE_BUTTON3_KEYCODE=5, - DEV_MAC_HID_ADB_MOUSE_SENDS_KEYCODES=6 -}; - -/* /proc/sys/dev/scsi */ -enum { - DEV_SCSI_LOGGING_LEVEL=1, -}; - -/* /proc/sys/dev/ipmi */ -enum { - DEV_IPMI_POWEROFF_POWERCYCLE=1, -}; - -/* /proc/sys/abi */ -enum -{ - ABI_DEFHANDLER_COFF=1, /* default handler for coff binaries */ - ABI_DEFHANDLER_ELF=2, /* default handler for ELF binaries */ - ABI_DEFHANDLER_LCALL7=3,/* default handler for procs using lcall7 */ - ABI_DEFHANDLER_LIBCSO=4,/* default handler for an libc.so ELF interp */ - ABI_TRACE=5, /* tracing flags */ - ABI_FAKE_UTSNAME=6, /* fake target utsname information */ -}; - -#ifdef __KERNEL__ -#include - -/* For the /proc/sys support */ -struct ctl_table; -struct nsproxy; -struct ctl_table_root; - -struct ctl_table_set { - struct list_head list; - struct ctl_table_set *parent; - int (*is_seen)(struct ctl_table_set *); -}; - -extern void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, - int (*is_seen)(struct ctl_table_set *)); - -struct ctl_table_header; - -extern void sysctl_head_get(struct ctl_table_header *); -extern void sysctl_head_put(struct ctl_table_header *); -extern int sysctl_is_seen(struct ctl_table_header *); -extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *); -extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); -extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, - struct ctl_table_header *prev); -extern void sysctl_head_finish(struct ctl_table_header *prev); -extern int sysctl_perm(struct ctl_table_root *root, - struct ctl_table *table, int op); - -typedef struct ctl_table ctl_table; - -typedef int ctl_handler (struct ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen); - -typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp, - void __user *buffer, size_t *lenp, loff_t *ppos); - -extern int proc_dostring(struct ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); -extern int proc_dointvec(struct ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); -extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); -extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, - struct file *, void __user *, size_t *, loff_t *); - -extern int do_sysctl (int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen); - -extern ctl_handler sysctl_data; -extern ctl_handler sysctl_string; -extern ctl_handler sysctl_intvec; -extern ctl_handler sysctl_jiffies; -extern ctl_handler sysctl_ms_jiffies; - - -/* - * Register a set of sysctl names by calling register_sysctl_table - * with an initialised array of struct ctl_table's. An entry with zero - * ctl_name and NULL procname terminates the table. table->de will be - * set up by the registration and need not be initialised in advance. - * - * sysctl names can be mirrored automatically under /proc/sys. The - * procname supplied controls /proc naming. - * - * The table's mode will be honoured both for sys_sysctl(2) and - * proc-fs access. - * - * Leaf nodes in the sysctl tree will be represented by a single file - * under /proc; non-leaf nodes will be represented by directories. A - * null procname disables /proc mirroring at this node. - * - * sysctl entries with a zero ctl_name will not be available through - * the binary sysctl interface. - * - * sysctl(2) can automatically manage read and write requests through - * the sysctl table. The data and maxlen fields of the ctl_table - * struct enable minimal validation of the values being written to be - * performed, and the mode field allows minimal authentication. - * - * More sophisticated management can be enabled by the provision of a - * strategy routine with the table entry. This will be called before - * any automatic read or write of the data is performed. - * - * The strategy routine may return: - * <0: Error occurred (error is passed to user process) - * 0: OK - proceed with automatic read or write. - * >0: OK - read or write has been done by the strategy routine, so - * return immediately. - * - * There must be a proc_handler routine for any terminal nodes - * mirrored under /proc/sys (non-terminals are handled by a built-in - * directory handler). Several default handlers are available to - * cover common cases. - */ - -/* A sysctl table is an array of struct ctl_table: */ -struct ctl_table -{ - int ctl_name; /* Binary ID */ - const char *procname; /* Text ID for /proc/sys, or zero */ - void *data; - int maxlen; - mode_t mode; - struct ctl_table *child; - struct ctl_table *parent; /* Automatically set */ - proc_handler *proc_handler; /* Callback for text formatting */ - ctl_handler *strategy; /* Callback function for all r/w */ - void *extra1; - void *extra2; -}; - -struct ctl_table_root { - struct list_head root_list; - struct ctl_table_set default_set; - struct ctl_table_set *(*lookup)(struct ctl_table_root *root, - struct nsproxy *namespaces); - int (*permissions)(struct ctl_table_root *root, - struct nsproxy *namespaces, struct ctl_table *table); -}; - -/* struct ctl_table_header is used to maintain dynamic lists of - struct ctl_table trees. */ -struct ctl_table_header -{ - struct ctl_table *ctl_table; - struct list_head ctl_entry; - int used; - int count; - struct completion *unregistering; - struct ctl_table *ctl_table_arg; - struct ctl_table_root *root; - struct ctl_table_set *set; - struct ctl_table *attached_by; - struct ctl_table *attached_to; - struct ctl_table_header *parent; -}; - -/* struct ctl_path describes where in the hierarchy a table is added */ -struct ctl_path { - const char *procname; - int ctl_name; -}; - -void register_sysctl_root(struct ctl_table_root *root); -struct ctl_table_header *__register_sysctl_paths( - struct ctl_table_root *root, struct nsproxy *namespaces, - const struct ctl_path *path, struct ctl_table *table); -struct ctl_table_header *register_sysctl_table(struct ctl_table * table); -struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, - struct ctl_table *table); - -void unregister_sysctl_table(struct ctl_table_header * table); -int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table); - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_SYSCTL_H */ diff -Nurb linux-2.6.27-720/include/linux/textsearch.h linux-2.6.27-710/include/linux/textsearch.h --- linux-2.6.27-720/include/linux/textsearch.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/textsearch.h 2008-10-09 18:13:53.000000000 -0400 @@ -162,9 +162,9 @@ { struct ts_config *conf; - conf = (struct ts_config *) kzalloc(TS_PRIV_ALIGN(sizeof(*conf)) + payload, gfp_mask); + conf = kzalloc(TS_PRIV_ALIGN(sizeof(*conf)) + payload, gfp_mask); if (conf == NULL) - return (struct ts_config *) ERR_PTR(-ENOMEM); + return ERR_PTR(-ENOMEM); return conf; } diff -Nurb linux-2.6.27-720/include/linux/types.h linux-2.6.27-710/include/linux/types.h --- linux-2.6.27-720/include/linux/types.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/types.h 2009-05-04 12:15:13.000000000 -0400 @@ -30,9 +30,7 @@ typedef __kernel_mqd_t mqd_t; #ifdef __KERNEL__ -#ifndef __cplusplus typedef _Bool bool; -#endif typedef __kernel_uid32_t uid_t; typedef __kernel_gid32_t gid_t; @@ -211,12 +209,4 @@ #endif /* __KERNEL__ */ -/* - * Click: Macros for defining empty structures. Needed because GCC's C and C++ - * compilers have different ABIs for empty structures. - */ - -#define EMPTY_STRUCT_DECL(s) struct s { int gcc_is_buggy; } -#define EMPTY_STRUCT_INIT(s) (s) { 0 } - #endif /* _LINUX_TYPES_H */ diff -Nurb linux-2.6.27-720/include/linux/unwind.h linux-2.6.27-710/include/linux/unwind.h --- linux-2.6.27-720/include/linux/unwind.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/unwind.h 2008-10-09 18:13:53.000000000 -0400 @@ -14,7 +14,7 @@ struct module; -EMPTY_STRUCT_DECL(unwind_frame_info); +struct unwind_frame_info {}; static inline void unwind_init(void) {} static inline void unwind_setup(void) {} diff -Nurb linux-2.6.27-720/include/linux/wait.h linux-2.6.27-710/include/linux/wait.h --- linux-2.6.27-720/include/linux/wait.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/linux/wait.h 2008-10-09 18:13:53.000000000 -0400 @@ -485,7 +485,7 @@ static inline int wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode) { - if (!test_bit(bit, (volatile unsigned long *) word)) + if (!test_bit(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } @@ -509,7 +509,7 @@ static inline int wait_on_bit_lock(void *word, int bit, int (*action)(void *), unsigned mode) { - if (!test_and_set_bit(bit, (volatile unsigned long *) word)) + if (!test_and_set_bit(bit, word)) return 0; return out_of_line_wait_on_bit_lock(word, bit, action, mode); } diff -Nurb linux-2.6.27-720/include/net/compat.h linux-2.6.27-710/include/net/compat.h --- linux-2.6.27-720/include/net/compat.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/net/compat.h 2008-10-09 18:13:53.000000000 -0400 @@ -33,9 +33,9 @@ extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *); extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int); -asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned); -asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); -asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *); +extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned); +extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); +extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *); extern int put_cmsg_compat(struct msghdr*, int, int, int, void *); extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, unsigned char *, int); diff -Nurb linux-2.6.27-720/include/net/neighbour.h linux-2.6.27-710/include/net/neighbour.h --- linux-2.6.27-720/include/net/neighbour.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/net/neighbour.h 2008-10-09 18:13:53.000000000 -0400 @@ -275,7 +275,7 @@ struct neigh_parms *p, int p_id, int pdev_id, char *p_name, - proc_handler_t *proc_handler, + proc_handler *proc_handler, ctl_handler *strategy); extern void neigh_sysctl_unregister(struct neigh_parms *p); diff -Nurb linux-2.6.27-720/include/net/netlink.h linux-2.6.27-710/include/net/netlink.h --- linux-2.6.27-720/include/net/netlink.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/net/netlink.h 2008-10-09 18:13:53.000000000 -0400 @@ -315,7 +315,7 @@ static inline struct nlattr *nlmsg_attrdata(const struct nlmsghdr *nlh, int hdrlen) { - unsigned char *data = (unsigned char *) nlmsg_data(nlh); + unsigned char *data = nlmsg_data(nlh); return (struct nlattr *) (data + NLMSG_ALIGN(hdrlen)); } @@ -732,7 +732,7 @@ */ static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype) { - return nla_find((struct nlattr *) nla_data(nla), nla_len(nla), attrtype); + return nla_find(nla_data(nla), nla_len(nla), attrtype); } /** @@ -748,7 +748,7 @@ struct nlattr *nla, const struct nla_policy *policy) { - return nla_parse(tb, maxtype, (struct nlattr *) nla_data(nla), nla_len(nla), policy); + return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); } /** @@ -775,7 +775,7 @@ if (nested_len < 0) return -EINVAL; if (nested_len >= nla_attr_size(0)) - return nla_parse(tb, maxtype, (struct nlattr *) nla_data(nla) + NLA_ALIGN(len), + return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), nested_len, policy); memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; @@ -1069,7 +1069,7 @@ */ static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start) { - struct nlattr *nest = (struct nlattr *) (char *)start + NLMSG_ALIGN(start->nla_len); + struct nlattr *nest = (void *)start + NLMSG_ALIGN(start->nla_len); start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start; return nla_nest_end(skb, nest); @@ -1103,7 +1103,7 @@ static inline int nla_validate_nested(struct nlattr *start, int maxtype, const struct nla_policy *policy) { - return nla_validate((struct nlattr *) nla_data(start), nla_len(start), maxtype, policy); + return nla_validate(nla_data(start), nla_len(start), maxtype, policy); } /** diff -Nurb linux-2.6.27-720/include/net/pkt_cls.h linux-2.6.27-710/include/net/pkt_cls.h --- linux-2.6.27-720/include/net/pkt_cls.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/net/pkt_cls.h 2008-10-09 18:13:53.000000000 -0400 @@ -302,7 +302,9 @@ #else /* CONFIG_NET_EMATCH */ -EMPTY_STRUCT_DECL(tcf_ematch_tree); +struct tcf_ematch_tree +{ +}; #define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0) #define tcf_em_tree_destroy(tp, t) do { (void)(t); } while(0) diff -Nurb linux-2.6.27-720/include/net/request_sock.h linux-2.6.27-710/include/net/request_sock.h --- linux-2.6.27-720/include/net/request_sock.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/net/request_sock.h 2008-10-09 18:13:53.000000000 -0400 @@ -60,7 +60,7 @@ static inline struct request_sock *reqsk_alloc(const struct request_sock_ops *ops) { - struct request_sock *req = (struct request_sock *) kmem_cache_alloc(ops->slab, GFP_ATOMIC); + struct request_sock *req = kmem_cache_alloc(ops->slab, GFP_ATOMIC); if (req != NULL) req->rsk_ops = ops; diff -Nurb linux-2.6.27-720/include/net/route.h linux-2.6.27-710/include/net/route.h --- linux-2.6.27-720/include/net/route.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/net/route.h 2009-05-04 12:15:13.000000000 -0400 @@ -161,16 +161,6 @@ __be16 sport, __be16 dport, struct sock *sk, int flags) { -#ifdef __cplusplus - struct flowi fl; - fl.oif = oif; - fl.nl_u.ip4_u.daddr = dst; - fl.nl_u.ip4_u.saddr = src; - fl.nl_u.ip4_u.tos = tos; - fl.proto = protocol; - fl.uli_u.ports.sport = sport; - fl.uli_u.ports.dport = dport; -#else struct flowi fl = { .oif = oif, .mark = sk->sk_mark, .nl_u = { .ip4_u = { .daddr = dst, @@ -180,7 +170,6 @@ .uli_u = { .ports = { .sport = sport, .dport = dport } } }; -#endif int err; struct net *net = sock_net(sk); diff -Nurb linux-2.6.27-720/include/net/sock.h linux-2.6.27-710/include/net/sock.h --- linux-2.6.27-720/include/net/sock.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/net/sock.h 2009-05-04 12:15:13.000000000 -0400 @@ -1104,13 +1104,13 @@ { if (skb->ip_summed == CHECKSUM_NONE) { int err = 0; - __wsum csum = csum_and_copy_from_user((unsigned char *) from, - (unsigned char *) page_address(page) + off, + __wsum csum = csum_and_copy_from_user(from, + page_address(page) + off, copy, 0, &err); if (err) return err; skb->csum = csum_block_add(skb->csum, csum, skb->len); - } else if (copy_from_user((char *) page_address(page) + off, from, copy)) + } else if (copy_from_user(page_address(page) + off, from, copy)) return -EFAULT; skb->len += copy; diff -Nurb linux-2.6.27-720/include/rdma/ib_user_verbs.h linux-2.6.27-710/include/rdma/ib_user_verbs.h --- linux-2.6.27-720/include/rdma/ib_user_verbs.h 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/include/rdma/ib_user_verbs.h 2008-10-09 18:13:53.000000000 -0400 @@ -504,7 +504,8 @@ __u64 driver_data[0]; }; -EMPTY_STRUCT_DECL(ib_uverbs_modify_qp_resp); +struct ib_uverbs_modify_qp_resp { +}; struct ib_uverbs_destroy_qp { __u64 response; diff -Nurb linux-2.6.27-720/kernel/sched.c linux-2.6.27-710/kernel/sched.c --- linux-2.6.27-720/kernel/sched.c 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/kernel/sched.c 2009-05-04 12:15:14.000000000 -0400 @@ -6298,7 +6298,7 @@ static void set_table_entry(struct ctl_table *entry, const char *procname, void *data, int maxlen, - mode_t mode, proc_handler_t *proc_handler) + mode_t mode, proc_handler *proc_handler) { entry->procname = procname; entry->data = data; diff -Nurb linux-2.6.27-720/net/core/dev.c linux-2.6.27-710/net/core/dev.c --- linux-2.6.27-720/net/core/dev.c 2009-05-04 12:19:35.000000000 -0400 +++ linux-2.6.27-710/net/core/dev.c 2009-05-04 12:16:04.000000000 -0400 @@ -252,9 +252,6 @@ write_unlock_bh(&dev_base_lock); } -/* Click: input packet handlers, might steal packets from net_rx_action. */ -static RAW_NOTIFIER_HEAD(net_in_chain); - /* * Our notifier list */ @@ -2024,31 +2021,6 @@ return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } - -/* - * Click: Allow Click to ask to intercept input packets. - */ -int -register_net_in(struct notifier_block *nb) -{ - int err; - rtnl_lock(); - err = raw_notifier_chain_register(&net_in_chain, nb); - rtnl_unlock(); - return err; -} - -int -unregister_net_in(struct notifier_block *nb) -{ - int err; - rtnl_lock(); - err = raw_notifier_chain_unregister(&net_in_chain, nb); - rtnl_unlock(); - return err; -} - - #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) /* These hooks defined here for ATM */ struct net_bridge; @@ -2219,14 +2191,14 @@ * NET_RX_SUCCESS: no congestion * NET_RX_DROP: packet was dropped */ -int __netif_receive_skb(struct sk_buff *skb) +int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; struct net_device *null_or_orig; int ret = NET_RX_DROP; int *cur_elevator = &__get_cpu_var(sknid_elevator); - //__be16 type; + __be16 type; *cur_elevator = 0; @@ -2255,14 +2227,6 @@ skb_reset_transport_header(skb); skb->mac_len = skb->network_header - skb->mac_header; - /* Click: may want to steal the packet */ - if (notifier_data >= 0 - && raw_notifier_call_chain(&net_in_chain, - notifier_data, - skb) & NOTIFY_STOP_MASK) { - return ret; - } - pt_prev = NULL; rcu_read_lock(); @@ -2381,8 +2345,7 @@ } local_irq_enable(); - __netif_receive_skb(skb, skb->protocol, skb_queue_len(&queue->input_pkt_queue)); -//XXX netif_receive_skb(skb); + netif_receive_skb(skb); } while (++work < quota && jiffies == start_time); return work; @@ -4935,7 +4898,6 @@ EXPORT_SYMBOL(dev_get_by_index); EXPORT_SYMBOL(dev_get_by_name); EXPORT_SYMBOL(dev_open); -EXPORT_SYMBOL(dev_ioctl); EXPORT_SYMBOL(dev_queue_xmit); EXPORT_SYMBOL(dev_remove_pack); EXPORT_SYMBOL(dev_set_allmulti); @@ -4948,16 +4910,10 @@ EXPORT_SYMBOL(netdev_set_master); EXPORT_SYMBOL(netdev_state_change); EXPORT_SYMBOL(netif_receive_skb); -EXPORT_SYMBOL(__netif_receive_skb); EXPORT_SYMBOL(netif_rx); EXPORT_SYMBOL(register_gifconf); EXPORT_SYMBOL(register_netdevice); EXPORT_SYMBOL(register_netdevice_notifier); - -/* Click */ -EXPORT_SYMBOL(register_net_in); -EXPORT_SYMBOL(unregister_net_in); - EXPORT_SYMBOL(skb_checksum_help); EXPORT_SYMBOL(synchronize_net); EXPORT_SYMBOL(unregister_netdevice); diff -Nurb linux-2.6.27-720/net/core/dev.c.orig linux-2.6.27-710/net/core/dev.c.orig --- linux-2.6.27-720/net/core/dev.c.orig 2009-05-04 12:16:04.000000000 -0400 +++ linux-2.6.27-710/net/core/dev.c.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,4936 +0,0 @@ -/* - * NET3 Protocol independent device support routines. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Derived from the non IP parts of dev.c 1.0.19 - * Authors: Ross Biro - * Fred N. van Kempen, - * Mark Evans, - * - * Additional Authors: - * Florian la Roche - * Alan Cox - * David Hinds - * Alexey Kuznetsov - * Adam Sulmicki - * Pekka Riikonen - * - * Changes: - * D.J. Barrow : Fixed bug where dev->refcnt gets set - * to 2 if register_netdev gets called - * before net_dev_init & also removed a - * few lines of code in the process. - * Alan Cox : device private ioctl copies fields back. - * Alan Cox : Transmit queue code does relevant - * stunts to keep the queue safe. - * Alan Cox : Fixed double lock. - * Alan Cox : Fixed promisc NULL pointer trap - * ???????? : Support the full private ioctl range - * Alan Cox : Moved ioctl permission check into - * drivers - * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI - * Alan Cox : 100 backlog just doesn't cut it when - * you start doing multicast video 8) - * Alan Cox : Rewrote net_bh and list manager. - * Alan Cox : Fix ETH_P_ALL echoback lengths. - * Alan Cox : Took out transmit every packet pass - * Saved a few bytes in the ioctl handler - * Alan Cox : Network driver sets packet type before - * calling netif_rx. Saves a function - * call a packet. - * Alan Cox : Hashed net_bh() - * Richard Kooijman: Timestamp fixes. - * Alan Cox : Wrong field in SIOCGIFDSTADDR - * Alan Cox : Device lock protection. - * Alan Cox : Fixed nasty side effect of device close - * changes. - * Rudi Cilibrasi : Pass the right thing to - * set_mac_address() - * Dave Miller : 32bit quantity for the device lock to - * make it work out on a Sparc. - * Bjorn Ekwall : Added KERNELD hack. - * Alan Cox : Cleaned up the backlog initialise. - * Craig Metz : SIOCGIFCONF fix if space for under - * 1 device. - * Thomas Bogendoerfer : Return ENODEV for dev_open, if there - * is no device open function. - * Andi Kleen : Fix error reporting for SIOCGIFCONF - * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF - * Cyrus Durgin : Cleaned for KMOD - * Adam Sulmicki : Bug Fix : Network Device Unload - * A network device unload needs to purge - * the backlog queue. - * Paul Rusty Russell : SIOCSIFNAME - * Pekka Riikonen : Netdev boot-time settings code - * Andrew Morton : Make unregister_netdevice wait - * indefinitely on dev->refcnt - * J Hadi Salim : - Backlog queue sampling - * - netif_rx() feedback - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "net-sysfs.h" - -/* - * The list of packet types we will receive (as opposed to discard) - * and the routines to invoke. - * - * Why 16. Because with 16 the only overlap we get on a hash of the - * low nibble of the protocol value is RARP/SNAP/X.25. - * - * NOTE: That is no longer true with the addition of VLAN tags. Not - * sure which should go first, but I bet it won't make much - * difference if we are running VLANs. The good news is that - * this protocol won't be in the list unless compiled in, so - * the average user (w/out VLANs) will not be adversely affected. - * --BLG - * - * 0800 IP - * 8100 802.1Q VLAN - * 0001 802.3 - * 0002 AX.25 - * 0004 802.2 - * 8035 RARP - * 0005 SNAP - * 0805 X.25 - * 0806 ARP - * 8137 IPX - * 0009 Localtalk - * 86DD IPv6 - */ - -#define PTYPE_HASH_SIZE (16) -#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) - -static DEFINE_SPINLOCK(ptype_lock); -static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; -static struct list_head ptype_all __read_mostly; /* Taps */ - -#ifdef CONFIG_NET_DMA -struct net_dma { - struct dma_client client; - spinlock_t lock; - cpumask_t channel_mask; - struct dma_chan **channels; -}; - -static enum dma_state_client -netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_state state); - -static struct net_dma net_dma = { - .client = { - .event_callback = netdev_dma_event, - }, -}; -#endif - -/* - * The @dev_base_head list is protected by @dev_base_lock and the rtnl - * semaphore. - * - * Pure readers hold dev_base_lock for reading. - * - * Writers must hold the rtnl semaphore while they loop through the - * dev_base_head list, and hold dev_base_lock for writing when they do the - * actual updates. This allows pure readers to access the list even - * while a writer is preparing to update it. - * - * To put it another way, dev_base_lock is held for writing only to - * protect against pure readers; the rtnl semaphore provides the - * protection against other writers. - * - * See, for example usages, register_netdevice() and - * unregister_netdevice(), which must be called with the rtnl - * semaphore held. - */ -DEFINE_RWLOCK(dev_base_lock); - -EXPORT_SYMBOL(dev_base_lock); - -#define NETDEV_HASHBITS 8 -#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) - -static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) -{ - unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); - return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; -} - -static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) -{ - return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; -} - -/* Device list insertion */ -static int list_netdevice(struct net_device *dev) -{ - struct net *net = dev_net(dev); - - ASSERT_RTNL(); - - write_lock_bh(&dev_base_lock); - list_add_tail(&dev->dev_list, &net->dev_base_head); - hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); - hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); - write_unlock_bh(&dev_base_lock); - return 0; -} - -/* Device list removal */ -static void unlist_netdevice(struct net_device *dev) -{ - ASSERT_RTNL(); - - /* Unlink dev from the device chain */ - write_lock_bh(&dev_base_lock); - list_del(&dev->dev_list); - hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); - write_unlock_bh(&dev_base_lock); -} - -/* - * Our notifier list - */ - -static RAW_NOTIFIER_HEAD(netdev_chain); - -/* - * Device drivers call our routines to queue packets here. We empty the - * queue in the local softnet handler. - */ - -DEFINE_PER_CPU(struct softnet_data, softnet_data); - -#ifdef CONFIG_LOCKDEP -/* - * register_netdevice() inits txq->_xmit_lock and sets lockdep class - * according to dev->type - */ -static const unsigned short netdev_lock_type[] = - {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, - ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, - ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, - ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, - ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, - ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, - ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, - ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, - ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, - ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, - ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, - ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, - ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, - ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID, - ARPHRD_NONE}; - -static const char *netdev_lock_name[] = - {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", - "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", - "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", - "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", - "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", - "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", - "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", - "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", - "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", - "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", - "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", - "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", - "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", - "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID", - "_xmit_NONE"}; - -static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; -static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; - -static inline unsigned short netdev_lock_pos(unsigned short dev_type) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) - if (netdev_lock_type[i] == dev_type) - return i; - /* the last key is used by default */ - return ARRAY_SIZE(netdev_lock_type) - 1; -} - -static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, - unsigned short dev_type) -{ - int i; - - i = netdev_lock_pos(dev_type); - lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], - netdev_lock_name[i]); -} - -static inline void netdev_set_addr_lockdep_class(struct net_device *dev) -{ - int i; - - i = netdev_lock_pos(dev->type); - lockdep_set_class_and_name(&dev->addr_list_lock, - &netdev_addr_lock_key[i], - netdev_lock_name[i]); -} -#else -static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, - unsigned short dev_type) -{ -} -static inline void netdev_set_addr_lockdep_class(struct net_device *dev) -{ -} -#endif - -/******************************************************************************* - - Protocol management and registration routines - -*******************************************************************************/ - -/* - * Add a protocol ID to the list. Now that the input handler is - * smarter we can dispense with all the messy stuff that used to be - * here. - * - * BEWARE!!! Protocol handlers, mangling input packets, - * MUST BE last in hash buckets and checking protocol handlers - * MUST start from promiscuous ptype_all chain in net_bh. - * It is true now, do not change it. - * Explanation follows: if protocol handler, mangling packet, will - * be the first on list, it is not able to sense, that packet - * is cloned and should be copied-on-write, so that it will - * change it and subsequent readers will get broken packet. - * --ANK (980803) - */ - -/** - * dev_add_pack - add packet handler - * @pt: packet type declaration - * - * Add a protocol handler to the networking stack. The passed &packet_type - * is linked into kernel lists and may not be freed until it has been - * removed from the kernel lists. - * - * This call does not sleep therefore it can not - * guarantee all CPU's that are in middle of receiving packets - * will see the new packet type (until the next received packet). - */ - -void dev_add_pack(struct packet_type *pt) -{ - int hash; - - spin_lock_bh(&ptype_lock); - if (pt->type == htons(ETH_P_ALL)) - list_add_rcu(&pt->list, &ptype_all); - else { - hash = ntohs(pt->type) & PTYPE_HASH_MASK; - list_add_rcu(&pt->list, &ptype_base[hash]); - } - spin_unlock_bh(&ptype_lock); -} - -/** - * __dev_remove_pack - remove packet handler - * @pt: packet type declaration - * - * Remove a protocol handler that was previously added to the kernel - * protocol handlers by dev_add_pack(). The passed &packet_type is removed - * from the kernel lists and can be freed or reused once this function - * returns. - * - * The packet type might still be in use by receivers - * and must not be freed until after all the CPU's have gone - * through a quiescent state. - */ -void __dev_remove_pack(struct packet_type *pt) -{ - struct list_head *head; - struct packet_type *pt1; - - spin_lock_bh(&ptype_lock); - - if (pt->type == htons(ETH_P_ALL)) - head = &ptype_all; - else - head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; - - list_for_each_entry(pt1, head, list) { - if (pt == pt1) { - list_del_rcu(&pt->list); - goto out; - } - } - - printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); -out: - spin_unlock_bh(&ptype_lock); -} -/** - * dev_remove_pack - remove packet handler - * @pt: packet type declaration - * - * Remove a protocol handler that was previously added to the kernel - * protocol handlers by dev_add_pack(). The passed &packet_type is removed - * from the kernel lists and can be freed or reused once this function - * returns. - * - * This call sleeps to guarantee that no CPU is looking at the packet - * type after return. - */ -void dev_remove_pack(struct packet_type *pt) -{ - __dev_remove_pack(pt); - - synchronize_net(); -} - -/****************************************************************************** - - Device Boot-time Settings Routines - -*******************************************************************************/ - -/* Boot time configuration table */ -static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; - -/** - * netdev_boot_setup_add - add new setup entry - * @name: name of the device - * @map: configured settings for the device - * - * Adds new setup entry to the dev_boot_setup list. The function - * returns 0 on error and 1 on success. This is a generic routine to - * all netdevices. - */ -static int netdev_boot_setup_add(char *name, struct ifmap *map) -{ - struct netdev_boot_setup *s; - int i; - - s = dev_boot_setup; - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { - if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { - memset(s[i].name, 0, sizeof(s[i].name)); - strlcpy(s[i].name, name, IFNAMSIZ); - memcpy(&s[i].map, map, sizeof(s[i].map)); - break; - } - } - - return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; -} - -/** - * netdev_boot_setup_check - check boot time settings - * @dev: the netdevice - * - * Check boot time settings for the device. - * The found settings are set for the device to be used - * later in the device probing. - * Returns 0 if no settings found, 1 if they are. - */ -int netdev_boot_setup_check(struct net_device *dev) -{ - struct netdev_boot_setup *s = dev_boot_setup; - int i; - - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { - if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && - !strcmp(dev->name, s[i].name)) { - dev->irq = s[i].map.irq; - dev->base_addr = s[i].map.base_addr; - dev->mem_start = s[i].map.mem_start; - dev->mem_end = s[i].map.mem_end; - return 1; - } - } - return 0; -} - - -/** - * netdev_boot_base - get address from boot time settings - * @prefix: prefix for network device - * @unit: id for network device - * - * Check boot time settings for the base address of device. - * The found settings are set for the device to be used - * later in the device probing. - * Returns 0 if no settings found. - */ -unsigned long netdev_boot_base(const char *prefix, int unit) -{ - const struct netdev_boot_setup *s = dev_boot_setup; - char name[IFNAMSIZ]; - int i; - - sprintf(name, "%s%d", prefix, unit); - - /* - * If device already registered then return base of 1 - * to indicate not to probe for this interface - */ - if (__dev_get_by_name(&init_net, name)) - return 1; - - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) - if (!strcmp(name, s[i].name)) - return s[i].map.base_addr; - return 0; -} - -/* - * Saves at boot time configured settings for any netdevice. - */ -int __init netdev_boot_setup(char *str) -{ - int ints[5]; - struct ifmap map; - - str = get_options(str, ARRAY_SIZE(ints), ints); - if (!str || !*str) - return 0; - - /* Save settings */ - memset(&map, 0, sizeof(map)); - if (ints[0] > 0) - map.irq = ints[1]; - if (ints[0] > 1) - map.base_addr = ints[2]; - if (ints[0] > 2) - map.mem_start = ints[3]; - if (ints[0] > 3) - map.mem_end = ints[4]; - - /* Add new entry to the list */ - return netdev_boot_setup_add(str, &map); -} - -__setup("netdev=", netdev_boot_setup); - -/******************************************************************************* - - Device Interface Subroutines - -*******************************************************************************/ - -/** - * __dev_get_by_name - find a device by its name - * @net: the applicable net namespace - * @name: name to find - * - * Find an interface by name. Must be called under RTNL semaphore - * or @dev_base_lock. If the name is found a pointer to the device - * is returned. If the name is not found then %NULL is returned. The - * reference counters are not incremented so the caller must be - * careful with locks. - */ - -struct net_device *__dev_get_by_name(struct net *net, const char *name) -{ - struct hlist_node *p; - - hlist_for_each(p, dev_name_hash(net, name)) { - struct net_device *dev - = hlist_entry(p, struct net_device, name_hlist); - if (!strncmp(dev->name, name, IFNAMSIZ)) - return dev; - } - return NULL; -} - -/** - * dev_get_by_name - find a device by its name - * @net: the applicable net namespace - * @name: name to find - * - * Find an interface by name. This can be called from any - * context and does its own locking. The returned handle has - * the usage count incremented and the caller must use dev_put() to - * release it when it is no longer needed. %NULL is returned if no - * matching device is found. - */ - -struct net_device *dev_get_by_name(struct net *net, const char *name) -{ - struct net_device *dev; - - read_lock(&dev_base_lock); - dev = __dev_get_by_name(net, name); - if (dev) - dev_hold(dev); - read_unlock(&dev_base_lock); - return dev; -} - -/** - * __dev_get_by_index - find a device by its ifindex - * @net: the applicable net namespace - * @ifindex: index of device - * - * Search for an interface by index. Returns %NULL if the device - * is not found or a pointer to the device. The device has not - * had its reference counter increased so the caller must be careful - * about locking. The caller must hold either the RTNL semaphore - * or @dev_base_lock. - */ - -struct net_device *__dev_get_by_index(struct net *net, int ifindex) -{ - struct hlist_node *p; - - hlist_for_each(p, dev_index_hash(net, ifindex)) { - struct net_device *dev - = hlist_entry(p, struct net_device, index_hlist); - if (dev->ifindex == ifindex) - return dev; - } - return NULL; -} - - -/** - * dev_get_by_index - find a device by its ifindex - * @net: the applicable net namespace - * @ifindex: index of device - * - * Search for an interface by index. Returns NULL if the device - * is not found or a pointer to the device. The device returned has - * had a reference added and the pointer is safe until the user calls - * dev_put to indicate they have finished with it. - */ - -struct net_device *dev_get_by_index(struct net *net, int ifindex) -{ - struct net_device *dev; - - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifindex); - if (dev) - dev_hold(dev); - read_unlock(&dev_base_lock); - return dev; -} - -/** - * dev_getbyhwaddr - find a device by its hardware address - * @net: the applicable net namespace - * @type: media type of device - * @ha: hardware address - * - * Search for an interface by MAC address. Returns NULL if the device - * is not found or a pointer to the device. The caller must hold the - * rtnl semaphore. The returned device has not had its ref count increased - * and the caller must therefore be careful about locking - * - * BUGS: - * If the API was consistent this would be __dev_get_by_hwaddr - */ - -struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) -{ - struct net_device *dev; - - ASSERT_RTNL(); - - for_each_netdev(net, dev) - if (dev->type == type && - !memcmp(dev->dev_addr, ha, dev->addr_len)) - return dev; - - return NULL; -} - -EXPORT_SYMBOL(dev_getbyhwaddr); - -struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) -{ - struct net_device *dev; - - ASSERT_RTNL(); - for_each_netdev(net, dev) - if (dev->type == type) - return dev; - - return NULL; -} - -EXPORT_SYMBOL(__dev_getfirstbyhwtype); - -struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) -{ - struct net_device *dev; - - rtnl_lock(); - dev = __dev_getfirstbyhwtype(net, type); - if (dev) - dev_hold(dev); - rtnl_unlock(); - return dev; -} - -EXPORT_SYMBOL(dev_getfirstbyhwtype); - -/** - * dev_get_by_flags - find any device with given flags - * @net: the applicable net namespace - * @if_flags: IFF_* values - * @mask: bitmask of bits in if_flags to check - * - * Search for any interface with the given flags. Returns NULL if a device - * is not found or a pointer to the device. The device returned has - * had a reference added and the pointer is safe until the user calls - * dev_put to indicate they have finished with it. - */ - -struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask) -{ - struct net_device *dev, *ret; - - ret = NULL; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { - if (((dev->flags ^ if_flags) & mask) == 0) { - dev_hold(dev); - ret = dev; - break; - } - } - read_unlock(&dev_base_lock); - return ret; -} - -/** - * dev_valid_name - check if name is okay for network device - * @name: name string - * - * Network device names need to be valid file names to - * to allow sysfs to work. We also disallow any kind of - * whitespace. - */ -int dev_valid_name(const char *name) -{ - if (*name == '\0') - return 0; - if (strlen(name) >= IFNAMSIZ) - return 0; - if (!strcmp(name, ".") || !strcmp(name, "..")) - return 0; - - while (*name) { - if (*name == '/' || isspace(*name)) - return 0; - name++; - } - return 1; -} - -/** - * __dev_alloc_name - allocate a name for a device - * @net: network namespace to allocate the device name in - * @name: name format string - * @buf: scratch buffer and result name string - * - * Passed a format string - eg "lt%d" it will try and find a suitable - * id. It scans list of devices to build up a free map, then chooses - * the first empty slot. The caller must hold the dev_base or rtnl lock - * while allocating the name and adding the device in order to avoid - * duplicates. - * Limited to bits_per_byte * page size devices (ie 32K on most platforms). - * Returns the number of the unit assigned or a negative errno code. - */ - -static int __dev_alloc_name(struct net *net, const char *name, char *buf) -{ - int i = 0; - const char *p; - const int max_netdevices = 8*PAGE_SIZE; - unsigned long *inuse; - struct net_device *d; - - p = strnchr(name, IFNAMSIZ-1, '%'); - if (p) { - /* - * Verify the string as this thing may have come from - * the user. There must be either one "%d" and no other "%" - * characters. - */ - if (p[1] != 'd' || strchr(p + 2, '%')) - return -EINVAL; - - /* Use one page as a bit array of possible slots */ - inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); - if (!inuse) - return -ENOMEM; - - for_each_netdev(net, d) { - if (!sscanf(d->name, name, &i)) - continue; - if (i < 0 || i >= max_netdevices) - continue; - - /* avoid cases where sscanf is not exact inverse of printf */ - snprintf(buf, IFNAMSIZ, name, i); - if (!strncmp(buf, d->name, IFNAMSIZ)) - set_bit(i, inuse); - } - - i = find_first_zero_bit(inuse, max_netdevices); - free_page((unsigned long) inuse); - } - - snprintf(buf, IFNAMSIZ, name, i); - if (!__dev_get_by_name(net, buf)) - return i; - - /* It is possible to run out of possible slots - * when the name is long and there isn't enough space left - * for the digits, or if all bits are used. - */ - return -ENFILE; -} - -/** - * dev_alloc_name - allocate a name for a device - * @dev: device - * @name: name format string - * - * Passed a format string - eg "lt%d" it will try and find a suitable - * id. It scans list of devices to build up a free map, then chooses - * the first empty slot. The caller must hold the dev_base or rtnl lock - * while allocating the name and adding the device in order to avoid - * duplicates. - * Limited to bits_per_byte * page size devices (ie 32K on most platforms). - * Returns the number of the unit assigned or a negative errno code. - */ - -int dev_alloc_name(struct net_device *dev, const char *name) -{ - char buf[IFNAMSIZ]; - struct net *net; - int ret; - - BUG_ON(!dev_net(dev)); - net = dev_net(dev); - ret = __dev_alloc_name(net, name, buf); - if (ret >= 0) - strlcpy(dev->name, buf, IFNAMSIZ); - return ret; -} - - -/** - * dev_change_name - change name of a device - * @dev: device - * @newname: name (or format string) must be at least IFNAMSIZ - * - * Change name of a device, can pass format strings "eth%d". - * for wildcarding. - */ -int dev_change_name(struct net_device *dev, char *newname) -{ - char oldname[IFNAMSIZ]; - int err = 0; - int ret; - struct net *net; - - ASSERT_RTNL(); - BUG_ON(!dev_net(dev)); - - net = dev_net(dev); - if (dev->flags & IFF_UP) - return -EBUSY; - - if (!dev_valid_name(newname)) - return -EINVAL; - - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) - return 0; - - memcpy(oldname, dev->name, IFNAMSIZ); - - if (strchr(newname, '%')) { - err = dev_alloc_name(dev, newname); - if (err < 0) - return err; - strcpy(newname, dev->name); - } - else if (__dev_get_by_name(net, newname)) - return -EEXIST; - else - strlcpy(dev->name, newname, IFNAMSIZ); - -rollback: - /* For now only devices in the initial network namespace - * are in sysfs. - */ - if (net == &init_net) { - ret = device_rename(&dev->dev, dev->name); - if (ret) { - memcpy(dev->name, oldname, IFNAMSIZ); - return ret; - } - } - - write_lock_bh(&dev_base_lock); - hlist_del(&dev->name_hlist); - hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); - write_unlock_bh(&dev_base_lock); - - ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); - ret = notifier_to_errno(ret); - - if (ret) { - if (err) { - printk(KERN_ERR - "%s: name change rollback failed: %d.\n", - dev->name, ret); - } else { - err = ret; - memcpy(dev->name, oldname, IFNAMSIZ); - goto rollback; - } - } - - return err; -} - -/** - * netdev_features_change - device changes features - * @dev: device to cause notification - * - * Called to indicate a device has changed features. - */ -void netdev_features_change(struct net_device *dev) -{ - call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev); -} -EXPORT_SYMBOL(netdev_features_change); - -/** - * netdev_state_change - device changes state - * @dev: device to cause notification - * - * Called to indicate a device has changed state. This function calls - * the notifier chains for netdev_chain and sends a NEWLINK message - * to the routing socket. - */ -void netdev_state_change(struct net_device *dev) -{ - if (dev->flags & IFF_UP) { - call_netdevice_notifiers(NETDEV_CHANGE, dev); - rtmsg_ifinfo(RTM_NEWLINK, dev, 0); - } -} - -void netdev_bonding_change(struct net_device *dev) -{ - call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev); -} -EXPORT_SYMBOL(netdev_bonding_change); - -/** - * dev_load - load a network module - * @net: the applicable net namespace - * @name: name of interface - * - * If a network interface is not present and the process has suitable - * privileges this function loads the module. If module loading is not - * available in this kernel then it becomes a nop. - */ - -void dev_load(struct net *net, const char *name) -{ - struct net_device *dev; - - read_lock(&dev_base_lock); - dev = __dev_get_by_name(net, name); - read_unlock(&dev_base_lock); - - if (!dev && capable(CAP_SYS_MODULE)) - request_module("%s", name); -} - -/** - * dev_open - prepare an interface for use. - * @dev: device to open - * - * Takes a device from down to up state. The device's private open - * function is invoked and then the multicast lists are loaded. Finally - * the device is moved into the up state and a %NETDEV_UP message is - * sent to the netdev notifier chain. - * - * Calling this function on an active interface is a nop. On a failure - * a negative errno code is returned. - */ -int dev_open(struct net_device *dev) -{ - int ret = 0; - - ASSERT_RTNL(); - - /* - * Is it already up? - */ - - if (dev->flags & IFF_UP) - return 0; - - /* - * Is it even present? - */ - if (!netif_device_present(dev)) - return -ENODEV; - - /* - * Call device private open method - */ - set_bit(__LINK_STATE_START, &dev->state); - - if (dev->validate_addr) - ret = dev->validate_addr(dev); - - if (!ret && dev->open) - ret = dev->open(dev); - - /* - * If it went open OK then: - */ - - if (ret) - clear_bit(__LINK_STATE_START, &dev->state); - else { - /* - * Set the flags. - */ - dev->flags |= IFF_UP; - - /* - * Initialize multicasting status - */ - dev_set_rx_mode(dev); - - /* - * Wakeup transmit queue engine - */ - dev_activate(dev); - - /* - * ... and announce new interface. - */ - call_netdevice_notifiers(NETDEV_UP, dev); - } - - return ret; -} - -/** - * dev_close - shutdown an interface. - * @dev: device to shutdown - * - * This function moves an active device into down state. A - * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device - * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier - * chain. - */ -int dev_close(struct net_device *dev) -{ - ASSERT_RTNL(); - - might_sleep(); - - if (!(dev->flags & IFF_UP)) - return 0; - - /* - * Tell people we are going down, so that they can - * prepare to death, when device is still operating. - */ - call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); - - clear_bit(__LINK_STATE_START, &dev->state); - - /* Synchronize to scheduled poll. We cannot touch poll list, - * it can be even on different cpu. So just clear netif_running(). - * - * dev->stop() will invoke napi_disable() on all of it's - * napi_struct instances on this device. - */ - smp_mb__after_clear_bit(); /* Commit netif_running(). */ - - dev_deactivate(dev); - - /* - * Call the device specific close. This cannot fail. - * Only if device is UP - * - * We allow it to be called even after a DETACH hot-plug - * event. - */ - if (dev->stop) - dev->stop(dev); - - /* - * Device is now down. - */ - - dev->flags &= ~IFF_UP; - - /* - * Tell people we are down - */ - call_netdevice_notifiers(NETDEV_DOWN, dev); - - return 0; -} - - -/** - * dev_disable_lro - disable Large Receive Offload on a device - * @dev: device - * - * Disable Large Receive Offload (LRO) on a net device. Must be - * called under RTNL. This is needed if received packets may be - * forwarded to another interface. - */ -void dev_disable_lro(struct net_device *dev) -{ - if (dev->ethtool_ops && dev->ethtool_ops->get_flags && - dev->ethtool_ops->set_flags) { - u32 flags = dev->ethtool_ops->get_flags(dev); - if (flags & ETH_FLAG_LRO) { - flags &= ~ETH_FLAG_LRO; - dev->ethtool_ops->set_flags(dev, flags); - } - } - WARN_ON(dev->features & NETIF_F_LRO); -} -EXPORT_SYMBOL(dev_disable_lro); - - -static int dev_boot_phase = 1; - -/* - * Device change register/unregister. These are not inline or static - * as we export them to the world. - */ - -/** - * register_netdevice_notifier - register a network notifier block - * @nb: notifier - * - * Register a notifier to be called when network device events occur. - * The notifier passed is linked into the kernel structures and must - * not be reused until it has been unregistered. A negative errno code - * is returned on a failure. - * - * When registered all registration and up events are replayed - * to the new notifier to allow device to have a race free - * view of the network device list. - */ - -int register_netdevice_notifier(struct notifier_block *nb) -{ - struct net_device *dev; - struct net_device *last; - struct net *net; - int err; - - rtnl_lock(); - err = raw_notifier_chain_register(&netdev_chain, nb); - if (err) - goto unlock; - if (dev_boot_phase) - goto unlock; - for_each_net(net) { - for_each_netdev(net, dev) { - err = nb->notifier_call(nb, NETDEV_REGISTER, dev); - err = notifier_to_errno(err); - if (err) - goto rollback; - - if (!(dev->flags & IFF_UP)) - continue; - - nb->notifier_call(nb, NETDEV_UP, dev); - } - } - -unlock: - rtnl_unlock(); - return err; - -rollback: - last = dev; - for_each_net(net) { - for_each_netdev(net, dev) { - if (dev == last) - break; - - if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); - } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); - } - } - - raw_notifier_chain_unregister(&netdev_chain, nb); - goto unlock; -} - -/** - * unregister_netdevice_notifier - unregister a network notifier block - * @nb: notifier - * - * Unregister a notifier previously registered by - * register_netdevice_notifier(). The notifier is unlinked into the - * kernel structures and may then be reused. A negative errno code - * is returned on a failure. - */ - -int unregister_netdevice_notifier(struct notifier_block *nb) -{ - int err; - - rtnl_lock(); - err = raw_notifier_chain_unregister(&netdev_chain, nb); - rtnl_unlock(); - return err; -} - -/** - * call_netdevice_notifiers - call all network notifier blocks - * @val: value passed unmodified to notifier function - * @dev: net_device pointer passed unmodified to notifier function - * - * Call all network notifier blocks. Parameters and return value - * are as for raw_notifier_call_chain(). - */ - -int call_netdevice_notifiers(unsigned long val, struct net_device *dev) -{ - return raw_notifier_call_chain(&netdev_chain, val, dev); -} - -/* When > 0 there are consumers of rx skb time stamps */ -static atomic_t netstamp_needed = ATOMIC_INIT(0); - -void net_enable_timestamp(void) -{ - atomic_inc(&netstamp_needed); -} - -void net_disable_timestamp(void) -{ - atomic_dec(&netstamp_needed); -} - -static inline void net_timestamp(struct sk_buff *skb) -{ - if (atomic_read(&netstamp_needed)) - __net_timestamp(skb); - else - skb->tstamp.tv64 = 0; -} - -/* - * Support routine. Sends outgoing frames to any network - * taps currently in use. - */ - -static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) -{ - struct packet_type *ptype; - - net_timestamp(skb); - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, &ptype_all, list) { - /* Never send packets back to the socket - * they originated from - MvS (miquels@drinkel.ow.org) - */ - if ((ptype->dev == dev || !ptype->dev) && - (ptype->af_packet_priv == NULL || - (struct sock *)ptype->af_packet_priv != skb->sk)) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - break; - - /* skb->nh should be correctly - set by sender, so that the second statement is - just protection against buggy protocols. - */ - skb_reset_mac_header(skb2); - - if (skb_network_header(skb2) < skb2->data || - skb2->network_header > skb2->tail) { - if (net_ratelimit()) - printk(KERN_CRIT "protocol %04x is " - "buggy, dev %s\n", - skb2->protocol, dev->name); - skb_reset_network_header(skb2); - } - - skb2->transport_header = skb2->network_header; - skb2->pkt_type = PACKET_OUTGOING; - ptype->func(skb2, skb->dev, ptype, skb->dev); - } - } - rcu_read_unlock(); -} - - -static inline void __netif_reschedule(struct Qdisc *q) -{ - struct softnet_data *sd; - unsigned long flags; - - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - q->next_sched = sd->output_queue; - sd->output_queue = q; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); -} - -void __netif_schedule(struct Qdisc *q) -{ - if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) - __netif_reschedule(q); -} -EXPORT_SYMBOL(__netif_schedule); - -void dev_kfree_skb_irq(struct sk_buff *skb) -{ - if (atomic_dec_and_test(&skb->users)) { - struct softnet_data *sd; - unsigned long flags; - - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - skb->next = sd->completion_queue; - sd->completion_queue = skb; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); - } -} -EXPORT_SYMBOL(dev_kfree_skb_irq); - -void dev_kfree_skb_any(struct sk_buff *skb) -{ - if (in_irq() || irqs_disabled()) - dev_kfree_skb_irq(skb); - else - dev_kfree_skb(skb); -} -EXPORT_SYMBOL(dev_kfree_skb_any); - - -/** - * netif_device_detach - mark device as removed - * @dev: network device - * - * Mark device as removed from system and therefore no longer available. - */ -void netif_device_detach(struct net_device *dev) -{ - if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && - netif_running(dev)) { - netif_stop_queue(dev); - } -} -EXPORT_SYMBOL(netif_device_detach); - -/** - * netif_device_attach - mark device as attached - * @dev: network device - * - * Mark device as attached from system and restart if needed. - */ -void netif_device_attach(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && - netif_running(dev)) { - netif_wake_queue(dev); - __netdev_watchdog_up(dev); - } -} -EXPORT_SYMBOL(netif_device_attach); - -static bool can_checksum_protocol(unsigned long features, __be16 protocol) -{ - return ((features & NETIF_F_GEN_CSUM) || - ((features & NETIF_F_IP_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_IPV6_CSUM) && - protocol == htons(ETH_P_IPV6))); -} - -static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) -{ - if (can_checksum_protocol(dev->features, skb->protocol)) - return true; - - if (skb->protocol == htons(ETH_P_8021Q)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - if (can_checksum_protocol(dev->features & dev->vlan_features, - veh->h_vlan_encapsulated_proto)) - return true; - } - - return false; -} - -/* - * Invalidate hardware checksum when packet is to be mangled, and - * complete checksum manually on outgoing path. - */ -int skb_checksum_help(struct sk_buff *skb) -{ - __wsum csum; - int ret = 0, offset; - - if (skb->ip_summed == CHECKSUM_COMPLETE) - goto out_set_summed; - - if (unlikely(skb_shinfo(skb)->gso_size)) { - /* Let GSO fix up the checksum. */ - goto out_set_summed; - } - - offset = skb->csum_start - skb_headroom(skb); - BUG_ON(offset >= skb_headlen(skb)); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - - offset += skb->csum_offset; - BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); - - if (skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(__sum16))) { - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (ret) - goto out; - } - - *(__sum16 *)(skb->data + offset) = csum_fold(csum); -out_set_summed: - skb->ip_summed = CHECKSUM_NONE; -out: - return ret; -} - -/** - * skb_gso_segment - Perform segmentation on skb. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - * - * This function segments the given skb and returns a list of segments. - * - * It may return NULL if the skb requires no segmentation. This is - * only possible when GSO is used for verifying header integrity. - */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) -{ - struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_type *ptype; - __be16 type = skb->protocol; - int err; - - BUG_ON(skb_shinfo(skb)->frag_list); - - skb_reset_mac_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; - __skb_pull(skb, skb->mac_len); - - if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { - if (skb_header_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) - return ERR_PTR(err); - } - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, - &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && !ptype->dev && ptype->gso_segment) { - if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { - err = ptype->gso_send_check(skb); - segs = ERR_PTR(err); - if (err || skb_gso_ok(skb, features)) - break; - __skb_push(skb, (skb->data - - skb_network_header(skb))); - } - segs = ptype->gso_segment(skb, features); - break; - } - } - rcu_read_unlock(); - - __skb_push(skb, skb->data - skb_mac_header(skb)); - - return segs; -} - -EXPORT_SYMBOL(skb_gso_segment); - -/* Take action when hardware reception checksum errors are detected. */ -#ifdef CONFIG_BUG -void netdev_rx_csum_fault(struct net_device *dev) -{ - if (net_ratelimit()) { - printk(KERN_ERR "%s: hw csum failure.\n", - dev ? dev->name : ""); - dump_stack(); - } -} -EXPORT_SYMBOL(netdev_rx_csum_fault); -#endif - -/* Actually, we should eliminate this check as soon as we know, that: - * 1. IOMMU is present and allows to map all the memory. - * 2. No high memory really exists on this machine. - */ - -static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) -{ -#ifdef CONFIG_HIGHMEM - int i; - - if (dev->features & NETIF_F_HIGHDMA) - return 0; - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - if (PageHighMem(skb_shinfo(skb)->frags[i].page)) - return 1; - -#endif - return 0; -} - -struct dev_gso_cb { - void (*destructor)(struct sk_buff *skb); -}; - -#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) - -static void dev_gso_skb_destructor(struct sk_buff *skb) -{ - struct dev_gso_cb *cb; - - do { - struct sk_buff *nskb = skb->next; - - skb->next = nskb->next; - nskb->next = NULL; - kfree_skb(nskb); - } while (skb->next); - - cb = DEV_GSO_CB(skb); - if (cb->destructor) - cb->destructor(skb); -} - -/** - * dev_gso_segment - Perform emulated hardware segmentation on skb. - * @skb: buffer to segment - * - * This function segments the given skb and stores the list of segments - * in skb->next. - */ -static int dev_gso_segment(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct sk_buff *segs; - int features = dev->features & ~(illegal_highdma(dev, skb) ? - NETIF_F_SG : 0); - - segs = skb_gso_segment(skb, features); - - /* Verifying header integrity only. */ - if (!segs) - return 0; - - if (IS_ERR(segs)) - return PTR_ERR(segs); - - skb->next = segs; - DEV_GSO_CB(skb)->destructor = skb->destructor; - skb->destructor = dev_gso_skb_destructor; - - return 0; -} - -int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq) -{ - if (likely(!skb->next)) { - if (!list_empty(&ptype_all)) - dev_queue_xmit_nit(skb, dev); - - if (netif_needs_gso(dev, skb)) { - if (unlikely(dev_gso_segment(skb))) - goto out_kfree_skb; - if (skb->next) - goto gso; - } - - return dev->hard_start_xmit(skb, dev); - } - -gso: - do { - struct sk_buff *nskb = skb->next; - int rc; - - skb->next = nskb->next; - nskb->next = NULL; - rc = dev->hard_start_xmit(nskb, dev); - if (unlikely(rc)) { - nskb->next = skb->next; - skb->next = nskb; - return rc; - } - if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) - return NETDEV_TX_BUSY; - } while (skb->next); - - skb->destructor = DEV_GSO_CB(skb)->destructor; - -out_kfree_skb: - kfree_skb(skb); - return 0; -} - -static u32 simple_tx_hashrnd; -static int simple_tx_hashrnd_initialized = 0; - -static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) -{ - u32 addr1, addr2, ports; - u32 hash, ihl; - u8 ip_proto = 0; - - if (unlikely(!simple_tx_hashrnd_initialized)) { - get_random_bytes(&simple_tx_hashrnd, 4); - simple_tx_hashrnd_initialized = 1; - } - - switch (skb->protocol) { - case __constant_htons(ETH_P_IP): - if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) - ip_proto = ip_hdr(skb)->protocol; - addr1 = ip_hdr(skb)->saddr; - addr2 = ip_hdr(skb)->daddr; - ihl = ip_hdr(skb)->ihl; - break; - case __constant_htons(ETH_P_IPV6): - ip_proto = ipv6_hdr(skb)->nexthdr; - addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; - addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; - ihl = (40 >> 2); - break; - default: - return 0; - } - - - switch (ip_proto) { - case IPPROTO_TCP: - case IPPROTO_UDP: - case IPPROTO_DCCP: - case IPPROTO_ESP: - case IPPROTO_AH: - case IPPROTO_SCTP: - case IPPROTO_UDPLITE: - ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); - break; - - default: - ports = 0; - break; - } - - hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); - - return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); -} - -static struct netdev_queue *dev_pick_tx(struct net_device *dev, - struct sk_buff *skb) -{ - u16 queue_index = 0; - - if (dev->select_queue) - queue_index = dev->select_queue(dev, skb); - else if (dev->real_num_tx_queues > 1) - queue_index = simple_tx_hash(dev, skb); - - skb_set_queue_mapping(skb, queue_index); - return netdev_get_tx_queue(dev, queue_index); -} - -/** - * dev_queue_xmit - transmit a buffer - * @skb: buffer to transmit - * - * Queue a buffer for transmission to a network device. The caller must - * have set the device and priority and built the buffer before calling - * this function. The function can be called from an interrupt. - * - * A negative errno code is returned on a failure. A success does not - * guarantee the frame will be transmitted as it may be dropped due - * to congestion or traffic shaping. - * - * ----------------------------------------------------------------------------------- - * I notice this method can also return errors from the queue disciplines, - * including NET_XMIT_DROP, which is a positive value. So, errors can also - * be positive. - * - * Regardless of the return value, the skb is consumed, so it is currently - * difficult to retry a send to this method. (You can bump the ref count - * before sending to hold a reference for retry if you are careful.) - * - * When calling this method, interrupts MUST be enabled. This is because - * the BH enable code must have IRQs enabled so that it will not deadlock. - * --BLG - */ -int dev_queue_xmit(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct netdev_queue *txq; - struct Qdisc *q; - int rc = -ENOMEM; - - /* GSO will handle the following emulations directly. */ - if (netif_needs_gso(dev, skb)) - goto gso; - - if (skb_shinfo(skb)->frag_list && - !(dev->features & NETIF_F_FRAGLIST) && - __skb_linearize(skb)) - goto out_kfree_skb; - - /* Fragmented skb is linearized if device does not support SG, - * or if at least one of fragments is in highmem and device - * does not support DMA from it. - */ - if (skb_shinfo(skb)->nr_frags && - (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && - __skb_linearize(skb)) - goto out_kfree_skb; - - /* If packet is not checksummed and device does not support - * checksumming for this protocol, complete checksumming here. - */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - skb_set_transport_header(skb, skb->csum_start - - skb_headroom(skb)); - if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) - goto out_kfree_skb; - } - -gso: - /* Disable soft irqs for various locks below. Also - * stops preemption for RCU. - */ - rcu_read_lock_bh(); - - txq = dev_pick_tx(dev, skb); - q = rcu_dereference(txq->qdisc); - -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); -#endif - if (q->enqueue) { - spinlock_t *root_lock = qdisc_lock(q); - - spin_lock(root_lock); - - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { - kfree_skb(skb); - rc = NET_XMIT_DROP; - } else { - rc = qdisc_enqueue_root(skb, q); - qdisc_run(q); - } - spin_unlock(root_lock); - - goto out; - } - - /* The device has no queue. Common case for software devices: - loopback, all the sorts of tunnels... - - Really, it is unlikely that netif_tx_lock protection is necessary - here. (f.e. loopback and IP tunnels are clean ignoring statistics - counters.) - However, it is possible, that they rely on protection - made by us here. - - Check this and shot the lock. It is not prone from deadlocks. - Either shot noqueue qdisc, it is even simpler 8) - */ - if (dev->flags & IFF_UP) { - int cpu = smp_processor_id(); /* ok because BHs are off */ - - if (txq->xmit_lock_owner != cpu) { - - HARD_TX_LOCK(dev, txq, cpu); - - if (!netif_tx_queue_stopped(txq)) { - rc = 0; - if (!dev_hard_start_xmit(skb, dev, txq)) { - HARD_TX_UNLOCK(dev, txq); - goto out; - } - } - HARD_TX_UNLOCK(dev, txq); - if (net_ratelimit()) - printk(KERN_CRIT "Virtual device %s asks to " - "queue packet!\n", dev->name); - } else { - /* Recursion is detected! It is possible, - * unfortunately */ - if (net_ratelimit()) - printk(KERN_CRIT "Dead loop on virtual device " - "%s, fix it urgently!\n", dev->name); - } - } - - rc = -ENETDOWN; - rcu_read_unlock_bh(); - -out_kfree_skb: - kfree_skb(skb); - return rc; -out: - rcu_read_unlock_bh(); - return rc; -} - - -/*======================================================================= - Receiver routines - =======================================================================*/ - -int netdev_max_backlog __read_mostly = 1000; -int netdev_budget __read_mostly = 300; -int weight_p __read_mostly = 64; /* old backlog weight */ - -DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; - - -/** - * netif_rx - post buffer to the network code - * @skb: buffer to post - * - * This function receives a packet from a device driver and queues it for - * the upper (protocol) levels to process. It always succeeds. The buffer - * may be dropped during processing for congestion control or by the - * protocol layers. - * - * return values: - * NET_RX_SUCCESS (no congestion) - * NET_RX_DROP (packet was dropped) - * - */ - -int netif_rx(struct sk_buff *skb) -{ - struct softnet_data *queue; - unsigned long flags; - - /* if netpoll wants it, pretend we never saw it */ - if (netpoll_rx(skb)) - return NET_RX_DROP; - - if (!skb->tstamp.tv64) - net_timestamp(skb); - - /* - * The code is rearranged so that the path is the most - * short when CPU is congested, but is still operating. - */ - local_irq_save(flags); - queue = &__get_cpu_var(softnet_data); - - __get_cpu_var(netdev_rx_stat).total++; - if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { - if (queue->input_pkt_queue.qlen) { -enqueue: - __skb_queue_tail(&queue->input_pkt_queue, skb); - local_irq_restore(flags); - return NET_RX_SUCCESS; - } - - napi_schedule(&queue->backlog); - goto enqueue; - } - - __get_cpu_var(netdev_rx_stat).dropped++; - local_irq_restore(flags); - - kfree_skb(skb); - return NET_RX_DROP; -} - -int netif_rx_ni(struct sk_buff *skb) -{ - int err; - - preempt_disable(); - err = netif_rx(skb); - if (local_softirq_pending()) - do_softirq(); - preempt_enable(); - - return err; -} - -EXPORT_SYMBOL(netif_rx_ni); - -static void net_tx_action(struct softirq_action *h) -{ - struct softnet_data *sd = &__get_cpu_var(softnet_data); - - if (sd->completion_queue) { - struct sk_buff *clist; - - local_irq_disable(); - clist = sd->completion_queue; - sd->completion_queue = NULL; - local_irq_enable(); - - while (clist) { - struct sk_buff *skb = clist; - clist = clist->next; - - WARN_ON(atomic_read(&skb->users)); - __kfree_skb(skb); - } - } - - if (sd->output_queue) { - struct Qdisc *head; - - local_irq_disable(); - head = sd->output_queue; - sd->output_queue = NULL; - local_irq_enable(); - - while (head) { - struct Qdisc *q = head; - spinlock_t *root_lock; - - head = head->next_sched; - - root_lock = qdisc_lock(q); - if (spin_trylock(root_lock)) { - smp_mb__before_clear_bit(); - clear_bit(__QDISC_STATE_SCHED, - &q->state); - qdisc_run(q); - spin_unlock(root_lock); - } else { - if (!test_bit(__QDISC_STATE_DEACTIVATED, - &q->state)) { - __netif_reschedule(q); - } else { - smp_mb__before_clear_bit(); - clear_bit(__QDISC_STATE_SCHED, - &q->state); - } - } - } - } -} - -static inline int deliver_skb(struct sk_buff *skb, - struct packet_type *pt_prev, - struct net_device *orig_dev) -{ - atomic_inc(&skb->users); - return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); -} - -#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) -/* These hooks defined here for ATM */ -struct net_bridge; -struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, - unsigned char *addr); -void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly; - -/* - * If bridge module is loaded call bridging hook. - * returns NULL if packet was consumed. - */ -struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, - struct sk_buff *skb) __read_mostly; -static inline struct sk_buff *handle_bridge(struct sk_buff *skb, - struct packet_type **pt_prev, int *ret, - struct net_device *orig_dev) -{ - struct net_bridge_port *port; - - if (skb->pkt_type == PACKET_LOOPBACK || - (port = rcu_dereference(skb->dev->br_port)) == NULL) - return skb; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } - - return br_handle_frame_hook(port, skb); -} -#else -#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) -#endif - -#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) -struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; -EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); - -static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, - struct packet_type **pt_prev, - int *ret, - struct net_device *orig_dev) -{ - if (skb->dev->macvlan_port == NULL) - return skb; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } - return macvlan_handle_frame_hook(skb); -} -#else -#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) -#endif - -#ifdef CONFIG_NET_CLS_ACT -/* TODO: Maybe we should just force sch_ingress to be compiled in - * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions - * a compare and 2 stores extra right now if we dont have it on - * but have CONFIG_NET_CLS_ACT - * NOTE: This doesnt stop any functionality; if you dont have - * the ingress scheduler, you just cant add policies on ingress. - * - */ -static int ing_filter(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - u32 ttl = G_TC_RTTL(skb->tc_verd); - struct netdev_queue *rxq; - int result = TC_ACT_OK; - struct Qdisc *q; - - if (MAX_RED_LOOP < ttl++) { - printk(KERN_WARNING - "Redir loop detected Dropping packet (%d->%d)\n", - skb->iif, dev->ifindex); - return TC_ACT_SHOT; - } - - skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - - rxq = &dev->rx_queue; - - q = rxq->qdisc; - if (q != &noop_qdisc) { - spin_lock(qdisc_lock(q)); - if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) - result = qdisc_enqueue_root(skb, q); - spin_unlock(qdisc_lock(q)); - } - - return result; -} - -static inline struct sk_buff *handle_ing(struct sk_buff *skb, - struct packet_type **pt_prev, - int *ret, struct net_device *orig_dev) -{ - if (skb->dev->rx_queue.qdisc == &noop_qdisc) - goto out; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } else { - /* Huh? Why does turning on AF_PACKET affect this? */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - } - - switch (ing_filter(skb)) { - case TC_ACT_SHOT: - case TC_ACT_STOLEN: - kfree_skb(skb); - return NULL; - } - -out: - skb->tc_verd = 0; - return skb; -} -#endif - -/* - * netif_nit_deliver - deliver received packets to network taps - * @skb: buffer - * - * This function is used to deliver incoming packets to network - * taps. It should be used when the normal netif_receive_skb path - * is bypassed, for example because of VLAN acceleration. - */ -void netif_nit_deliver(struct sk_buff *skb) -{ - struct packet_type *ptype; - - if (list_empty(&ptype_all)) - return; - - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, &ptype_all, list) { - if (!ptype->dev || ptype->dev == skb->dev) - deliver_skb(skb, ptype, skb->dev); - } - rcu_read_unlock(); -} - -/* The code already makes the assumption that packet handlers run - * sequentially on the same CPU. -Sapan */ -DEFINE_PER_CPU(int, sknid_elevator) = 0; - -/** - * netif_receive_skb - process receive buffer from network - * @skb: buffer to process - * - * netif_receive_skb() is the main receive data processing function. - * It always succeeds. The buffer may be dropped during processing - * for congestion control or by the protocol layers. - * - * This function may only be called from softirq context and interrupts - * should be enabled. - * - * Return values (usually ignored): - * NET_RX_SUCCESS: no congestion - * NET_RX_DROP: packet was dropped - */ -int netif_receive_skb(struct sk_buff *skb) -{ - struct packet_type *ptype, *pt_prev; - struct net_device *orig_dev; - struct net_device *null_or_orig; - int ret = NET_RX_DROP; - int *cur_elevator = &__get_cpu_var(sknid_elevator); - __be16 type; - - *cur_elevator = 0; - - /* if we've gotten here through NAPI, check netpoll */ - if (netpoll_receive_skb(skb)) - return NET_RX_DROP; - - if (!skb->tstamp.tv64) - net_timestamp(skb); - - if (!skb->iif) - skb->iif = skb->dev->ifindex; - - null_or_orig = NULL; - orig_dev = skb->dev; - if (orig_dev->master) { - if (skb_bond_should_drop(skb)) - null_or_orig = orig_dev; /* deliver only exact match */ - else - skb->dev = orig_dev->master; - } - - __get_cpu_var(netdev_rx_stat).total++; - - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; - - pt_prev = NULL; - - rcu_read_lock(); - - /* Don't receive packets in an exiting network namespace */ - if (!net_alive(dev_net(skb->dev))) - goto out; - -#ifdef CONFIG_NET_CLS_ACT - if (skb->tc_verd & TC_NCLS) { - skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); - goto ncls; - } -#endif - - list_for_each_entry_rcu(ptype, &ptype_all, list) { - if (ptype->dev == null_or_orig || ptype->dev == skb->dev || - ptype->dev == orig_dev) { - if (pt_prev) - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = ptype; - } - } - -#ifdef CONFIG_NET_CLS_ACT - skb = handle_ing(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto out; -ncls: -#endif - - skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto out; - skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto out; - - type = skb->protocol; - list_for_each_entry_rcu(ptype, - &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && - (ptype->dev == null_or_orig || ptype->dev == skb->dev || - ptype->dev == orig_dev)) { - if (pt_prev) - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = ptype; - } - } - - if (pt_prev) { - /* At this point, cur_elevator may be -2 or a positive value, in - * case a previous protocol handler marked it */ - if (*cur_elevator) { - atomic_inc(&skb->users); - } - - ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); - - if ((*cur_elevator)>0) { - skb->skb_tag = *cur_elevator; - list_for_each_entry_rcu(ptype, &ptype_all, list) { - if ((!ptype->dev || ptype->dev == skb->dev) && (ptype->sknid_elevator)) { - ret = deliver_skb(skb, ptype, orig_dev); - } - } - } - - if (*cur_elevator) { - /* We have a packet */ - kfree_skb(skb); - } - } else { - kfree_skb(skb); - /* Jamal, now you will not able to escape explaining - * me how you were going to use this. :-) - */ - ret = NET_RX_DROP; - } - -out: - rcu_read_unlock(); - return ret; -} - -/* Network device is going away, flush any packets still pending */ -static void flush_backlog(void *arg) -{ - struct net_device *dev = arg; - struct softnet_data *queue = &__get_cpu_var(softnet_data); - struct sk_buff *skb, *tmp; - - skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) - if (skb->dev == dev) { - __skb_unlink(skb, &queue->input_pkt_queue); - kfree_skb(skb); - } -} - -static int process_backlog(struct napi_struct *napi, int quota) -{ - int work = 0; - struct softnet_data *queue = &__get_cpu_var(softnet_data); - unsigned long start_time = jiffies; - - napi->weight = weight_p; - do { - struct sk_buff *skb; - - local_irq_disable(); - skb = __skb_dequeue(&queue->input_pkt_queue); - if (!skb) { - __napi_complete(napi); - local_irq_enable(); - break; - } - local_irq_enable(); - - netif_receive_skb(skb); - } while (++work < quota && jiffies == start_time); - - return work; -} - -/** - * __napi_schedule - schedule for receive - * @n: entry to schedule - * - * The entry's receive function will be scheduled to run - */ -void __napi_schedule(struct napi_struct *n) -{ - unsigned long flags; - - local_irq_save(flags); - list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - local_irq_restore(flags); -} -EXPORT_SYMBOL(__napi_schedule); - - -static void net_rx_action(struct softirq_action *h) -{ - struct list_head *list = &__get_cpu_var(softnet_data).poll_list; - unsigned long start_time = jiffies; - int budget = netdev_budget; - void *have; - - local_irq_disable(); - - while (!list_empty(list)) { - struct napi_struct *n; - int work, weight; - - /* If softirq window is exhuasted then punt. - * - * Note that this is a slight policy change from the - * previous NAPI code, which would allow up to 2 - * jiffies to pass before breaking out. The test - * used to be "jiffies - start_time > 1". - */ - if (unlikely(budget <= 0 || jiffies != start_time)) - goto softnet_break; - - local_irq_enable(); - - /* Even though interrupts have been re-enabled, this - * access is safe because interrupts can only add new - * entries to the tail of this list, and only ->poll() - * calls can remove this head entry from the list. - */ - n = list_entry(list->next, struct napi_struct, poll_list); - - have = netpoll_poll_lock(n); - - weight = n->weight; - - /* This NAPI_STATE_SCHED test is for avoiding a race - * with netpoll's poll_napi(). Only the entity which - * obtains the lock and sees NAPI_STATE_SCHED set will - * actually make the ->poll() call. Therefore we avoid - * accidently calling ->poll() when NAPI is not scheduled. - */ - work = 0; - if (test_bit(NAPI_STATE_SCHED, &n->state)) - work = n->poll(n, weight); - - WARN_ON_ONCE(work > weight); - - budget -= work; - - local_irq_disable(); - - /* Drivers must not modify the NAPI state if they - * consume the entire weight. In such cases this code - * still "owns" the NAPI instance and therefore can - * move the instance around on the list at-will. - */ - if (unlikely(work == weight)) { - if (unlikely(napi_disable_pending(n))) - __napi_complete(n); - else - list_move_tail(&n->poll_list, list); - } - - netpoll_poll_unlock(have); - } -out: - local_irq_enable(); - -#ifdef CONFIG_NET_DMA - /* - * There may not be any more sk_buffs coming right now, so push - * any pending DMA copies to hardware - */ - if (!cpus_empty(net_dma.channel_mask)) { - int chan_idx; - for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) { - struct dma_chan *chan = net_dma.channels[chan_idx]; - if (chan) - dma_async_memcpy_issue_pending(chan); - } - } -#endif - - return; - -softnet_break: - __get_cpu_var(netdev_rx_stat).time_squeeze++; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - goto out; -} - -static gifconf_func_t * gifconf_list [NPROTO]; - -/** - * register_gifconf - register a SIOCGIF handler - * @family: Address family - * @gifconf: Function handler - * - * Register protocol dependent address dumping routines. The handler - * that is passed must not be freed or reused until it has been replaced - * by another handler. - */ -int register_gifconf(unsigned int family, gifconf_func_t * gifconf) -{ - if (family >= NPROTO) - return -EINVAL; - gifconf_list[family] = gifconf; - return 0; -} - - -/* - * Map an interface index to its name (SIOCGIFNAME) - */ - -/* - * We need this ioctl for efficient implementation of the - * if_indextoname() function required by the IPv6 API. Without - * it, we would have to search all the interfaces to find a - * match. --pb - */ - -static int dev_ifname(struct net *net, struct ifreq __user *arg) -{ - struct net_device *dev; - struct ifreq ifr; - - /* - * Fetch the caller's info block. - */ - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifr.ifr_ifindex); - if (!dev) { - read_unlock(&dev_base_lock); - return -ENODEV; - } - - strcpy(ifr.ifr_name, dev->name); - read_unlock(&dev_base_lock); - - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - return 0; -} - -/* - * Perform a SIOCGIFCONF call. This structure will change - * size eventually, and there is nothing I can do about it. - * Thus we will need a 'compatibility mode'. - */ - -static int dev_ifconf(struct net *net, char __user *arg) -{ - struct ifconf ifc; - struct net_device *dev; - char __user *pos; - int len; - int total; - int i; - - /* - * Fetch the caller's info block. - */ - - if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) - return -EFAULT; - - pos = ifc.ifc_buf; - len = ifc.ifc_len; - - /* - * Loop over the interfaces, and write an info block for each. - */ - - total = 0; - for_each_netdev(net, dev) { - if (!nx_dev_visible(current->nx_info, dev)) - continue; - for (i = 0; i < NPROTO; i++) { - if (gifconf_list[i]) { - int done; - if (!pos) - done = gifconf_list[i](dev, NULL, 0); - else - done = gifconf_list[i](dev, pos + total, - len - total); - if (done < 0) - return -EFAULT; - total += done; - } - } - } - - /* - * All done. Write the updated control block back to the caller. - */ - ifc.ifc_len = total; - - /* - * Both BSD and Solaris return 0 here, so we do too. - */ - return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; -} - -#ifdef CONFIG_PROC_FS -/* - * This is invoked by the /proc filesystem handler to display a device - * in detail. - */ -void *dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) -{ - struct net *net = seq_file_net(seq); - loff_t off; - struct net_device *dev; - - read_lock(&dev_base_lock); - if (!*pos) - return SEQ_START_TOKEN; - - off = 1; - for_each_netdev(net, dev) - if (off++ == *pos) - return dev; - - return NULL; -} - -void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - ++*pos; - return v == SEQ_START_TOKEN ? - first_net_device(net) : next_net_device((struct net_device *)v); -} - -void dev_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) -{ - read_unlock(&dev_base_lock); -} - -static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) -{ - struct net_device_stats *stats = dev->get_stats(dev); - - if (!nx_dev_visible(current->nx_info, dev)) - return; - - seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " - "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", - dev->name, stats->rx_bytes, stats->rx_packets, - stats->rx_errors, - stats->rx_dropped + stats->rx_missed_errors, - stats->rx_fifo_errors, - stats->rx_length_errors + stats->rx_over_errors + - stats->rx_crc_errors + stats->rx_frame_errors, - stats->rx_compressed, stats->multicast, - stats->tx_bytes, stats->tx_packets, - stats->tx_errors, stats->tx_dropped, - stats->tx_fifo_errors, stats->collisions, - stats->tx_carrier_errors + - stats->tx_aborted_errors + - stats->tx_window_errors + - stats->tx_heartbeat_errors, - stats->tx_compressed); -} - -/* - * Called from the PROCfs module. This now uses the new arbitrary sized - * /proc/net interface to create /proc/net/dev - */ -static int dev_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Inter-| Receive " - " | Transmit\n" - " face |bytes packets errs drop fifo frame " - "compressed multicast|bytes packets errs " - "drop fifo colls carrier compressed\n"); - else - dev_seq_printf_stats(seq, v); - return 0; -} - -static struct netif_rx_stats *softnet_get_online(loff_t *pos) -{ - struct netif_rx_stats *rc = NULL; - - while (*pos < nr_cpu_ids) - if (cpu_online(*pos)) { - rc = &per_cpu(netdev_rx_stat, *pos); - break; - } else - ++*pos; - return rc; -} - -static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) -{ - return softnet_get_online(pos); -} - -static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return softnet_get_online(pos); -} - -static void softnet_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int softnet_seq_show(struct seq_file *seq, void *v) -{ - struct netif_rx_stats *s = v; - - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - s->total, s->dropped, s->time_squeeze, 0, - 0, 0, 0, 0, /* was fastroute */ - s->cpu_collision ); - return 0; -} - -static const struct seq_operations dev_seq_ops = { - .start = dev_seq_start, - .next = dev_seq_next, - .stop = dev_seq_stop, - .show = dev_seq_show, -}; - -static int dev_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &dev_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations dev_seq_fops = { - .owner = THIS_MODULE, - .open = dev_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static const struct seq_operations softnet_seq_ops = { - .start = softnet_seq_start, - .next = softnet_seq_next, - .stop = softnet_seq_stop, - .show = softnet_seq_show, -}; - -static int softnet_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &softnet_seq_ops); -} - -static const struct file_operations softnet_seq_fops = { - .owner = THIS_MODULE, - .open = softnet_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static void *ptype_get_idx(loff_t pos) -{ - struct packet_type *pt = NULL; - loff_t i = 0; - int t; - - list_for_each_entry_rcu(pt, &ptype_all, list) { - if (i == pos) - return pt; - ++i; - } - - for (t = 0; t < PTYPE_HASH_SIZE; t++) { - list_for_each_entry_rcu(pt, &ptype_base[t], list) { - if (i == pos) - return pt; - ++i; - } - } - return NULL; -} - -static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; -} - -static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct packet_type *pt; - struct list_head *nxt; - int hash; - - ++*pos; - if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); - - pt = v; - nxt = pt->list.next; - if (pt->type == htons(ETH_P_ALL)) { - if (nxt != &ptype_all) - goto found; - hash = 0; - nxt = ptype_base[0].next; - } else - hash = ntohs(pt->type) & PTYPE_HASH_MASK; - - while (nxt == &ptype_base[hash]) { - if (++hash >= PTYPE_HASH_SIZE) - return NULL; - nxt = ptype_base[hash].next; - } -found: - return list_entry(nxt, struct packet_type, list); -} - -static void ptype_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static void ptype_seq_decode(struct seq_file *seq, void *sym) -{ -#ifdef CONFIG_KALLSYMS - unsigned long offset = 0, symsize; - const char *symname; - char *modname; - char namebuf[128]; - - symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset, - &modname, namebuf); - - if (symname) { - char *delim = ":"; - - if (!modname) - modname = delim = ""; - seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim, - symname, offset); - return; - } -#endif - - seq_printf(seq, "[%p]", sym); -} - -static int ptype_seq_show(struct seq_file *seq, void *v) -{ - struct packet_type *pt = v; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { - if (pt->type == htons(ETH_P_ALL)) - seq_puts(seq, "ALL "); - else - seq_printf(seq, "%04x", ntohs(pt->type)); - - seq_printf(seq, " %-8s ", - pt->dev ? pt->dev->name : ""); - ptype_seq_decode(seq, pt->func); - seq_putc(seq, '\n'); - } - - return 0; -} - -static const struct seq_operations ptype_seq_ops = { - .start = ptype_seq_start, - .next = ptype_seq_next, - .stop = ptype_seq_stop, - .show = ptype_seq_show, -}; - -static int ptype_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ptype_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ptype_seq_fops = { - .owner = THIS_MODULE, - .open = ptype_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - - -static int __net_init dev_proc_net_init(struct net *net) -{ - int rc = -ENOMEM; - - if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) - goto out; - if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) - goto out_dev; - if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) - goto out_softnet; - - if (wext_proc_init(net)) - goto out_ptype; - rc = 0; -out: - return rc; -out_ptype: - proc_net_remove(net, "ptype"); -out_softnet: - proc_net_remove(net, "softnet_stat"); -out_dev: - proc_net_remove(net, "dev"); - goto out; -} - -static void __net_exit dev_proc_net_exit(struct net *net) -{ - wext_proc_exit(net); - - proc_net_remove(net, "ptype"); - proc_net_remove(net, "softnet_stat"); - proc_net_remove(net, "dev"); -} - -static struct pernet_operations __net_initdata dev_proc_ops = { - .init = dev_proc_net_init, - .exit = dev_proc_net_exit, -}; - -static int __init dev_proc_init(void) -{ - return register_pernet_subsys(&dev_proc_ops); -} -#else -#define dev_proc_init() 0 -#endif /* CONFIG_PROC_FS */ - - -/** - * netdev_set_master - set up master/slave pair - * @slave: slave device - * @master: new master device - * - * Changes the master device of the slave. Pass %NULL to break the - * bonding. The caller must hold the RTNL semaphore. On a failure - * a negative errno code is returned. On success the reference counts - * are adjusted, %RTM_NEWLINK is sent to the routing socket and the - * function returns zero. - */ -int netdev_set_master(struct net_device *slave, struct net_device *master) -{ - struct net_device *old = slave->master; - - ASSERT_RTNL(); - - if (master) { - if (old) - return -EBUSY; - dev_hold(master); - } - - slave->master = master; - - synchronize_net(); - - if (old) - dev_put(old); - - if (master) - slave->flags |= IFF_SLAVE; - else - slave->flags &= ~IFF_SLAVE; - - rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); - return 0; -} - -static void dev_change_rx_flags(struct net_device *dev, int flags) -{ - if (dev->flags & IFF_UP && dev->change_rx_flags) - dev->change_rx_flags(dev, flags); -} - -static int __dev_set_promiscuity(struct net_device *dev, int inc) -{ - unsigned short old_flags = dev->flags; - - ASSERT_RTNL(); - - dev->flags |= IFF_PROMISC; - dev->promiscuity += inc; - if (dev->promiscuity == 0) { - /* - * Avoid overflow. - * If inc causes overflow, untouch promisc and return error. - */ - if (inc < 0) - dev->flags &= ~IFF_PROMISC; - else { - dev->promiscuity -= inc; - printk(KERN_WARNING "%s: promiscuity touches roof, " - "set promiscuity failed, promiscuity feature " - "of device might be broken.\n", dev->name); - return -EOVERFLOW; - } - } - if (dev->flags != old_flags) { - printk(KERN_INFO "device %s %s promiscuous mode\n", - dev->name, (dev->flags & IFF_PROMISC) ? "entered" : - "left"); - if (audit_enabled) - audit_log(current->audit_context, GFP_ATOMIC, - AUDIT_ANOM_PROMISCUOUS, - "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", - dev->name, (dev->flags & IFF_PROMISC), - (old_flags & IFF_PROMISC), - audit_get_loginuid(current), - current->uid, current->gid, - audit_get_sessionid(current)); - - dev_change_rx_flags(dev, IFF_PROMISC); - } - return 0; -} - -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - * Return 0 if successful or a negative errno code on error. - */ -int dev_set_promiscuity(struct net_device *dev, int inc) -{ - unsigned short old_flags = dev->flags; - int err; - - err = __dev_set_promiscuity(dev, inc); - if (err < 0) - return err; - if (dev->flags != old_flags) - dev_set_rx_mode(dev); - return err; -} - -/** - * dev_set_allmulti - update allmulti count on a device - * @dev: device - * @inc: modifier - * - * Add or remove reception of all multicast frames to a device. While the - * count in the device remains above zero the interface remains listening - * to all interfaces. Once it hits zero the device reverts back to normal - * filtering operation. A negative @inc value is used to drop the counter - * when releasing a resource needing all multicasts. - * Return 0 if successful or a negative errno code on error. - */ - -int dev_set_allmulti(struct net_device *dev, int inc) -{ - unsigned short old_flags = dev->flags; - - ASSERT_RTNL(); - - dev->flags |= IFF_ALLMULTI; - dev->allmulti += inc; - if (dev->allmulti == 0) { - /* - * Avoid overflow. - * If inc causes overflow, untouch allmulti and return error. - */ - if (inc < 0) - dev->flags &= ~IFF_ALLMULTI; - else { - dev->allmulti -= inc; - printk(KERN_WARNING "%s: allmulti touches roof, " - "set allmulti failed, allmulti feature of " - "device might be broken.\n", dev->name); - return -EOVERFLOW; - } - } - if (dev->flags ^ old_flags) { - dev_change_rx_flags(dev, IFF_ALLMULTI); - dev_set_rx_mode(dev); - } - return 0; -} - -/* - * Upload unicast and multicast address lists to device and - * configure RX filtering. When the device doesn't support unicast - * filtering it is put in promiscuous mode while unicast addresses - * are present. - */ -void __dev_set_rx_mode(struct net_device *dev) -{ - /* dev_open will call this function so the list will stay sane. */ - if (!(dev->flags&IFF_UP)) - return; - - if (!netif_device_present(dev)) - return; - - if (dev->set_rx_mode) - dev->set_rx_mode(dev); - else { - /* Unicast addresses changes may only happen under the rtnl, - * therefore calling __dev_set_promiscuity here is safe. - */ - if (dev->uc_count > 0 && !dev->uc_promisc) { - __dev_set_promiscuity(dev, 1); - dev->uc_promisc = 1; - } else if (dev->uc_count == 0 && dev->uc_promisc) { - __dev_set_promiscuity(dev, -1); - dev->uc_promisc = 0; - } - - if (dev->set_multicast_list) - dev->set_multicast_list(dev); - } -} - -void dev_set_rx_mode(struct net_device *dev) -{ - netif_addr_lock_bh(dev); - __dev_set_rx_mode(dev); - netif_addr_unlock_bh(dev); -} - -int __dev_addr_delete(struct dev_addr_list **list, int *count, - void *addr, int alen, int glbl) -{ - struct dev_addr_list *da; - - for (; (da = *list) != NULL; list = &da->next) { - if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && - alen == da->da_addrlen) { - if (glbl) { - int old_glbl = da->da_gusers; - da->da_gusers = 0; - if (old_glbl == 0) - break; - } - if (--da->da_users) - return 0; - - *list = da->next; - kfree(da); - (*count)--; - return 0; - } - } - return -ENOENT; -} - -int __dev_addr_add(struct dev_addr_list **list, int *count, - void *addr, int alen, int glbl) -{ - struct dev_addr_list *da; - - for (da = *list; da != NULL; da = da->next) { - if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && - da->da_addrlen == alen) { - if (glbl) { - int old_glbl = da->da_gusers; - da->da_gusers = 1; - if (old_glbl) - return 0; - } - da->da_users++; - return 0; - } - } - - da = kzalloc(sizeof(*da), GFP_ATOMIC); - if (da == NULL) - return -ENOMEM; - memcpy(da->da_addr, addr, alen); - da->da_addrlen = alen; - da->da_users = 1; - da->da_gusers = glbl ? 1 : 0; - da->next = *list; - *list = da; - (*count)++; - return 0; -} - -/** - * dev_unicast_delete - Release secondary unicast address. - * @dev: device - * @addr: address to delete - * @alen: length of @addr - * - * Release reference to a secondary unicast address and remove it - * from the device if the reference count drops to zero. - * - * The caller must hold the rtnl_mutex. - */ -int dev_unicast_delete(struct net_device *dev, void *addr, int alen) -{ - int err; - - ASSERT_RTNL(); - - netif_addr_lock_bh(dev); - err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); - if (!err) - __dev_set_rx_mode(dev); - netif_addr_unlock_bh(dev); - return err; -} -EXPORT_SYMBOL(dev_unicast_delete); - -/** - * dev_unicast_add - add a secondary unicast address - * @dev: device - * @addr: address to add - * @alen: length of @addr - * - * Add a secondary unicast address to the device or increase - * the reference count if it already exists. - * - * The caller must hold the rtnl_mutex. - */ -int dev_unicast_add(struct net_device *dev, void *addr, int alen) -{ - int err; - - ASSERT_RTNL(); - - netif_addr_lock_bh(dev); - err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); - if (!err) - __dev_set_rx_mode(dev); - netif_addr_unlock_bh(dev); - return err; -} -EXPORT_SYMBOL(dev_unicast_add); - -int __dev_addr_sync(struct dev_addr_list **to, int *to_count, - struct dev_addr_list **from, int *from_count) -{ - struct dev_addr_list *da, *next; - int err = 0; - - da = *from; - while (da != NULL) { - next = da->next; - if (!da->da_synced) { - err = __dev_addr_add(to, to_count, - da->da_addr, da->da_addrlen, 0); - if (err < 0) - break; - da->da_synced = 1; - da->da_users++; - } else if (da->da_users == 1) { - __dev_addr_delete(to, to_count, - da->da_addr, da->da_addrlen, 0); - __dev_addr_delete(from, from_count, - da->da_addr, da->da_addrlen, 0); - } - da = next; - } - return err; -} - -void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, - struct dev_addr_list **from, int *from_count) -{ - struct dev_addr_list *da, *next; - - da = *from; - while (da != NULL) { - next = da->next; - if (da->da_synced) { - __dev_addr_delete(to, to_count, - da->da_addr, da->da_addrlen, 0); - da->da_synced = 0; - __dev_addr_delete(from, from_count, - da->da_addr, da->da_addrlen, 0); - } - da = next; - } -} - -/** - * dev_unicast_sync - Synchronize device's unicast list to another device - * @to: destination device - * @from: source device - * - * Add newly added addresses to the destination device and release - * addresses that have no users left. The source device must be - * locked by netif_tx_lock_bh. - * - * This function is intended to be called from the dev->set_rx_mode - * function of layered software devices. - */ -int dev_unicast_sync(struct net_device *to, struct net_device *from) -{ - int err = 0; - - netif_addr_lock_bh(to); - err = __dev_addr_sync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count); - if (!err) - __dev_set_rx_mode(to); - netif_addr_unlock_bh(to); - return err; -} -EXPORT_SYMBOL(dev_unicast_sync); - -/** - * dev_unicast_unsync - Remove synchronized addresses from the destination device - * @to: destination device - * @from: source device - * - * Remove all addresses that were added to the destination device by - * dev_unicast_sync(). This function is intended to be called from the - * dev->stop function of layered software devices. - */ -void dev_unicast_unsync(struct net_device *to, struct net_device *from) -{ - netif_addr_lock_bh(from); - netif_addr_lock(to); - - __dev_addr_unsync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count); - __dev_set_rx_mode(to); - - netif_addr_unlock(to); - netif_addr_unlock_bh(from); -} -EXPORT_SYMBOL(dev_unicast_unsync); - -static void __dev_addr_discard(struct dev_addr_list **list) -{ - struct dev_addr_list *tmp; - - while (*list != NULL) { - tmp = *list; - *list = tmp->next; - if (tmp->da_users > tmp->da_gusers) - printk("__dev_addr_discard: address leakage! " - "da_users=%d\n", tmp->da_users); - kfree(tmp); - } -} - -static void dev_addr_discard(struct net_device *dev) -{ - netif_addr_lock_bh(dev); - - __dev_addr_discard(&dev->uc_list); - dev->uc_count = 0; - - __dev_addr_discard(&dev->mc_list); - dev->mc_count = 0; - - netif_addr_unlock_bh(dev); -} - -unsigned dev_get_flags(const struct net_device *dev) -{ - unsigned flags; - - flags = (dev->flags & ~(IFF_PROMISC | - IFF_ALLMULTI | - IFF_RUNNING | - IFF_LOWER_UP | - IFF_DORMANT)) | - (dev->gflags & (IFF_PROMISC | - IFF_ALLMULTI)); - - if (netif_running(dev)) { - if (netif_oper_up(dev)) - flags |= IFF_RUNNING; - if (netif_carrier_ok(dev)) - flags |= IFF_LOWER_UP; - if (netif_dormant(dev)) - flags |= IFF_DORMANT; - } - - return flags; -} - -int dev_change_flags(struct net_device *dev, unsigned flags) -{ - int ret, changes; - int old_flags = dev->flags; - - ASSERT_RTNL(); - - /* - * Set the flags on our device. - */ - - dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | - IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | - IFF_AUTOMEDIA)) | - (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | - IFF_ALLMULTI)); - - /* - * Load in the correct multicast list now the flags have changed. - */ - - if ((old_flags ^ flags) & IFF_MULTICAST) - dev_change_rx_flags(dev, IFF_MULTICAST); - - dev_set_rx_mode(dev); - - /* - * Have we downed the interface. We handle IFF_UP ourselves - * according to user attempts to set it, rather than blindly - * setting it. - */ - - ret = 0; - if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ - ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); - - if (!ret) - dev_set_rx_mode(dev); - } - - if (dev->flags & IFF_UP && - ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | - IFF_VOLATILE))) - call_netdevice_notifiers(NETDEV_CHANGE, dev); - - if ((flags ^ dev->gflags) & IFF_PROMISC) { - int inc = (flags & IFF_PROMISC) ? +1 : -1; - dev->gflags ^= IFF_PROMISC; - dev_set_promiscuity(dev, inc); - } - - /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI - is important. Some (broken) drivers set IFF_PROMISC, when - IFF_ALLMULTI is requested not asking us and not reporting. - */ - if ((flags ^ dev->gflags) & IFF_ALLMULTI) { - int inc = (flags & IFF_ALLMULTI) ? +1 : -1; - dev->gflags ^= IFF_ALLMULTI; - dev_set_allmulti(dev, inc); - } - - /* Exclude state transition flags, already notified */ - changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); - if (changes) - rtmsg_ifinfo(RTM_NEWLINK, dev, changes); - - return ret; -} - -int dev_set_mtu(struct net_device *dev, int new_mtu) -{ - int err; - - if (new_mtu == dev->mtu) - return 0; - - /* MTU must be positive. */ - if (new_mtu < 0) - return -EINVAL; - - if (!netif_device_present(dev)) - return -ENODEV; - - err = 0; - if (dev->change_mtu) - err = dev->change_mtu(dev, new_mtu); - else - dev->mtu = new_mtu; - if (!err && dev->flags & IFF_UP) - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); - return err; -} - -int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) -{ - int err; - - if (!dev->set_mac_address) - return -EOPNOTSUPP; - if (sa->sa_family != dev->type) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - err = dev->set_mac_address(dev, sa); - if (!err) - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - return err; -} - -/* - * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) - */ -static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) -{ - int err; - struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); - - if (!dev) - return -ENODEV; - - switch (cmd) { - case SIOCGIFFLAGS: /* Get interface flags */ - ifr->ifr_flags = dev_get_flags(dev); - return 0; - - case SIOCGIFMETRIC: /* Get the metric on the interface - (currently unused) */ - ifr->ifr_metric = 0; - return 0; - - case SIOCGIFMTU: /* Get the MTU of a device */ - ifr->ifr_mtu = dev->mtu; - return 0; - - case SIOCGIFHWADDR: - if (!dev->addr_len) - memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); - else - memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - ifr->ifr_hwaddr.sa_family = dev->type; - return 0; - - case SIOCGIFSLAVE: - err = -EINVAL; - break; - - case SIOCGIFMAP: - ifr->ifr_map.mem_start = dev->mem_start; - ifr->ifr_map.mem_end = dev->mem_end; - ifr->ifr_map.base_addr = dev->base_addr; - ifr->ifr_map.irq = dev->irq; - ifr->ifr_map.dma = dev->dma; - ifr->ifr_map.port = dev->if_port; - return 0; - - case SIOCGIFINDEX: - ifr->ifr_ifindex = dev->ifindex; - return 0; - - case SIOCGIFTXQLEN: - ifr->ifr_qlen = dev->tx_queue_len; - return 0; - - default: - /* dev_ioctl() should ensure this case - * is never reached - */ - WARN_ON(1); - err = -EINVAL; - break; - - } - return err; -} - -/* - * Perform the SIOCxIFxxx calls, inside rtnl_lock() - */ -static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) -{ - int err; - struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); - - if (!dev) - return -ENODEV; - - switch (cmd) { - case SIOCSIFFLAGS: /* Set interface flags */ - return dev_change_flags(dev, ifr->ifr_flags); - - case SIOCSIFMETRIC: /* Set the metric on the interface - (currently unused) */ - return -EOPNOTSUPP; - - case SIOCSIFMTU: /* Set the MTU of a device */ - return dev_set_mtu(dev, ifr->ifr_mtu); - - case SIOCSIFHWADDR: - return dev_set_mac_address(dev, &ifr->ifr_hwaddr); - - case SIOCSIFHWBROADCAST: - if (ifr->ifr_hwaddr.sa_family != dev->type) - return -EINVAL; - memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - return 0; - - case SIOCSIFMAP: - if (dev->set_config) { - if (!netif_device_present(dev)) - return -ENODEV; - return dev->set_config(dev, &ifr->ifr_map); - } - return -EOPNOTSUPP; - - case SIOCADDMULTI: - if ((!dev->set_multicast_list && !dev->set_rx_mode) || - ifr->ifr_hwaddr.sa_family != AF_UNSPEC) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, - dev->addr_len, 1); - - case SIOCDELMULTI: - if ((!dev->set_multicast_list && !dev->set_rx_mode) || - ifr->ifr_hwaddr.sa_family != AF_UNSPEC) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, - dev->addr_len, 1); - - case SIOCSIFTXQLEN: - if (ifr->ifr_qlen < 0) - return -EINVAL; - dev->tx_queue_len = ifr->ifr_qlen; - return 0; - - case SIOCSIFNAME: - ifr->ifr_newname[IFNAMSIZ-1] = '\0'; - return dev_change_name(dev, ifr->ifr_newname); - - /* - * Unknown or private ioctl - */ - - default: - if ((cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15) || - cmd == SIOCBONDENSLAVE || - cmd == SIOCBONDRELEASE || - cmd == SIOCBONDSETHWADDR || - cmd == SIOCBONDSLAVEINFOQUERY || - cmd == SIOCBONDINFOQUERY || - cmd == SIOCBONDCHANGEACTIVE || - cmd == SIOCGMIIPHY || - cmd == SIOCGMIIREG || - cmd == SIOCSMIIREG || - cmd == SIOCBRADDIF || - cmd == SIOCBRDELIF || - cmd == SIOCWANDEV) { - err = -EOPNOTSUPP; - if (dev->do_ioctl) { - if (netif_device_present(dev)) - err = dev->do_ioctl(dev, ifr, - cmd); - else - err = -ENODEV; - } - } else - err = -EINVAL; - - } - return err; -} - -/* - * This function handles all "interface"-type I/O control requests. The actual - * 'doing' part of this is dev_ifsioc above. - */ - -/** - * dev_ioctl - network device ioctl - * @net: the applicable net namespace - * @cmd: command to issue - * @arg: pointer to a struct ifreq in user space - * - * Issue ioctl functions to devices. This is normally called by the - * user space syscall interfaces but can sometimes be useful for - * other purposes. The return value is the return from the syscall if - * positive or a negative errno code on error. - */ - -int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) -{ - struct ifreq ifr; - int ret; - char *colon; - - /* One special case: SIOCGIFCONF takes ifconf argument - and requires shared lock, because it sleeps writing - to user space. - */ - - if (cmd == SIOCGIFCONF) { - rtnl_lock(); - ret = dev_ifconf(net, (char __user *) arg); - rtnl_unlock(); - return ret; - } - if (cmd == SIOCGIFNAME) - return dev_ifname(net, (struct ifreq __user *)arg); - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - ifr.ifr_name[IFNAMSIZ-1] = 0; - - colon = strchr(ifr.ifr_name, ':'); - if (colon) - *colon = 0; - - /* - * See which interface the caller is talking about. - */ - - switch (cmd) { - /* - * These ioctl calls: - * - can be done by all. - * - atomic and do not require locking. - * - return a value - */ - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFHWADDR: - case SIOCGIFSLAVE: - case SIOCGIFMAP: - case SIOCGIFINDEX: - case SIOCGIFTXQLEN: - dev_load(net, ifr.ifr_name); - read_lock(&dev_base_lock); - ret = dev_ifsioc_locked(net, &ifr, cmd); - read_unlock(&dev_base_lock); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; - - case SIOCETHTOOL: - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ethtool(net, &ifr); - rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; - - /* - * These ioctl calls: - * - require superuser power. - * - require strict serialization. - * - return a value - */ - case SIOCGMIIPHY: - case SIOCGMIIREG: - case SIOCSIFNAME: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; - - /* - * These ioctl calls: - * - require superuser power. - * - require strict serialization. - * - do not return a value - */ - case SIOCSIFFLAGS: - case SIOCSIFMETRIC: - case SIOCSIFMTU: - case SIOCSIFMAP: - case SIOCSIFHWADDR: - case SIOCSIFSLAVE: - case SIOCADDMULTI: - case SIOCDELMULTI: - case SIOCSIFHWBROADCAST: - case SIOCSIFTXQLEN: - case SIOCSMIIREG: - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - case SIOCBRADDIF: - case SIOCBRDELIF: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - /* fall through */ - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - return ret; - - case SIOCGIFMEM: - /* Get the per device memory space. We can add this but - * currently do not support it */ - case SIOCSIFMEM: - /* Set the per device memory buffer space. - * Not applicable in our case */ - case SIOCSIFLINK: - return -EINVAL; - - /* - * Unknown or private ioctl. - */ - default: - if (cmd == SIOCWANDEV || - (cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - if (!ret && copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - return ret; - } - /* Take care of Wireless Extensions */ - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) - return wext_handle_ioctl(net, &ifr, cmd, arg); - return -EINVAL; - } -} - - -/** - * dev_new_index - allocate an ifindex - * @net: the applicable net namespace - * - * Returns a suitable unique value for a new device interface - * number. The caller must hold the rtnl semaphore or the - * dev_base_lock to be sure it remains unique. - */ -static int dev_new_index(struct net *net) -{ - static int ifindex; - for (;;) { - if (++ifindex <= 0) - ifindex = 1; - if (!__dev_get_by_index(net, ifindex)) - return ifindex; - } -} - -/* Delayed registration/unregisteration */ -static LIST_HEAD(net_todo_list); - -static void net_set_todo(struct net_device *dev) -{ - list_add_tail(&dev->todo_list, &net_todo_list); -} - -static void rollback_registered(struct net_device *dev) -{ - BUG_ON(dev_boot_phase); - ASSERT_RTNL(); - - /* Some devices call without registering for initialization unwind. */ - if (dev->reg_state == NETREG_UNINITIALIZED) { - printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " - "was registered\n", dev->name, dev); - - WARN_ON(1); - return; - } - - BUG_ON(dev->reg_state != NETREG_REGISTERED); - - /* If device is running, close it first. */ - dev_close(dev); - - /* And unlink it from device chain. */ - unlist_netdevice(dev); - - dev->reg_state = NETREG_UNREGISTERING; - - synchronize_net(); - - /* Shutdown queueing discipline. */ - dev_shutdown(dev); - - - /* Notify protocols, that we are about to destroy - this device. They should clean all the things. - */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - - /* - * Flush the unicast and multicast chains - */ - dev_addr_discard(dev); - - if (dev->uninit) - dev->uninit(dev); - - /* Notifier chain MUST detach us from master device. */ - WARN_ON(dev->master); - - /* Remove entries from kobject tree */ - netdev_unregister_kobject(dev); - - synchronize_net(); - - dev_put(dev); -} - -static void __netdev_init_queue_locks_one(struct net_device *dev, - struct netdev_queue *dev_queue, - void *_unused) -{ - spin_lock_init(&dev_queue->_xmit_lock); - netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); - dev_queue->xmit_lock_owner = -1; -} - -static void netdev_init_queue_locks(struct net_device *dev) -{ - netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); - __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); -} - -/** - * register_netdevice - register a network device - * @dev: device to register - * - * Take a completed network device structure and add it to the kernel - * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier - * chain. 0 is returned on success. A negative errno code is returned - * on a failure to set up the device, or if the name is a duplicate. - * - * Callers must hold the rtnl semaphore. You may want - * register_netdev() instead of this. - * - * BUGS: - * The locking appears insufficient to guarantee two parallel registers - * will not get the same name. - */ - -int register_netdevice(struct net_device *dev) -{ - struct hlist_head *head; - struct hlist_node *p; - int ret; - struct net *net; - - BUG_ON(dev_boot_phase); - ASSERT_RTNL(); - - might_sleep(); - - /* When net_device's are persistent, this will be fatal. */ - BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); - BUG_ON(!dev_net(dev)); - net = dev_net(dev); - - spin_lock_init(&dev->addr_list_lock); - netdev_set_addr_lockdep_class(dev); - netdev_init_queue_locks(dev); - - dev->iflink = -1; - - /* Init, if this function is available */ - if (dev->init) { - ret = dev->init(dev); - if (ret) { - if (ret > 0) - ret = -EIO; - goto out; - } - } - - if (!dev_valid_name(dev->name)) { - ret = -EINVAL; - goto err_uninit; - } - - dev->ifindex = dev_new_index(net); - if (dev->iflink == -1) - dev->iflink = dev->ifindex; - - /* Check for existence of name */ - head = dev_name_hash(net, dev->name); - hlist_for_each(p, head) { - struct net_device *d - = hlist_entry(p, struct net_device, name_hlist); - if (!strncmp(d->name, dev->name, IFNAMSIZ)) { - ret = -EEXIST; - goto err_uninit; - } - } - - /* Fix illegal checksum combinations */ - if ((dev->features & NETIF_F_HW_CSUM) && - (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); - } - - if ((dev->features & NETIF_F_NO_CSUM) && - (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); - } - - - /* Fix illegal SG+CSUM combinations. */ - if ((dev->features & NETIF_F_SG) && - !(dev->features & NETIF_F_ALL_CSUM)) { - printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n", - dev->name); - dev->features &= ~NETIF_F_SG; - } - - /* TSO requires that SG is present as well. */ - if ((dev->features & NETIF_F_TSO) && - !(dev->features & NETIF_F_SG)) { - printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n", - dev->name); - dev->features &= ~NETIF_F_TSO; - } - if (dev->features & NETIF_F_UFO) { - if (!(dev->features & NETIF_F_GEN_CSUM)) { - printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " - "NETIF_F_HW_CSUM feature.\n", - dev->name); - dev->features &= ~NETIF_F_UFO; - } - if (!(dev->features & NETIF_F_SG)) { - printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " - "NETIF_F_SG feature.\n", - dev->name); - dev->features &= ~NETIF_F_UFO; - } - } - - /* Enable software GSO if SG is supported. */ - if (dev->features & NETIF_F_SG) - dev->features |= NETIF_F_GSO; - - netdev_initialize_kobject(dev); - ret = netdev_register_kobject(dev); - if (ret) - goto err_uninit; - dev->reg_state = NETREG_REGISTERED; - - /* - * Default initial state at registry is that the - * device is present. - */ - - set_bit(__LINK_STATE_PRESENT, &dev->state); - - dev_init_scheduler(dev); - dev_hold(dev); - list_netdevice(dev); - - /* Notify protocols, that a new device appeared. */ - ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); - ret = notifier_to_errno(ret); - if (ret) { - rollback_registered(dev); - dev->reg_state = NETREG_UNREGISTERED; - } - -out: - return ret; - -err_uninit: - if (dev->uninit) - dev->uninit(dev); - goto out; -} - -/** - * register_netdev - register a network device - * @dev: device to register - * - * Take a completed network device structure and add it to the kernel - * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier - * chain. 0 is returned on success. A negative errno code is returned - * on a failure to set up the device, or if the name is a duplicate. - * - * This is a wrapper around register_netdevice that takes the rtnl semaphore - * and expands the device name if you passed a format string to - * alloc_netdev. - */ -int register_netdev(struct net_device *dev) -{ - int err; - - rtnl_lock(); - - /* - * If the name is a format string the caller wants us to do a - * name allocation. - */ - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto out; - } - - err = register_netdevice(dev); -out: - rtnl_unlock(); - return err; -} -EXPORT_SYMBOL(register_netdev); - -/* - * netdev_wait_allrefs - wait until all references are gone. - * - * This is called when unregistering network devices. - * - * Any protocol or device that holds a reference should register - * for netdevice notification, and cleanup and put back the - * reference if they receive an UNREGISTER event. - * We can get stuck here if buggy protocols don't correctly - * call dev_put. - */ -static void netdev_wait_allrefs(struct net_device *dev) -{ - unsigned long rebroadcast_time, warning_time; - - rebroadcast_time = warning_time = jiffies; - while (atomic_read(&dev->refcnt) != 0) { - if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { - rtnl_lock(); - - /* Rebroadcast unregister notification */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - - if (test_bit(__LINK_STATE_LINKWATCH_PENDING, - &dev->state)) { - /* We must not have linkwatch events - * pending on unregister. If this - * happens, we simply run the queue - * unscheduled, resulting in a noop - * for this device. - */ - linkwatch_run_queue(); - } - - __rtnl_unlock(); - - rebroadcast_time = jiffies; - } - - msleep(250); - - if (time_after(jiffies, warning_time + 10 * HZ)) { - printk(KERN_EMERG "unregister_netdevice: " - "waiting for %s to become free. Usage " - "count = %d\n", - dev->name, atomic_read(&dev->refcnt)); - warning_time = jiffies; - } - } -} - -/* The sequence is: - * - * rtnl_lock(); - * ... - * register_netdevice(x1); - * register_netdevice(x2); - * ... - * unregister_netdevice(y1); - * unregister_netdevice(y2); - * ... - * rtnl_unlock(); - * free_netdev(y1); - * free_netdev(y2); - * - * We are invoked by rtnl_unlock(). - * This allows us to deal with problems: - * 1) We can delete sysfs objects which invoke hotplug - * without deadlocking with linkwatch via keventd. - * 2) Since we run with the RTNL semaphore not held, we can sleep - * safely in order to wait for the netdev refcnt to drop to zero. - * - * We must not return until all unregister events added during - * the interval the lock was held have been completed. - */ -void netdev_run_todo(void) -{ - struct list_head list; - - /* Snapshot list, allow later requests */ - list_replace_init(&net_todo_list, &list); - - __rtnl_unlock(); - - while (!list_empty(&list)) { - struct net_device *dev - = list_entry(list.next, struct net_device, todo_list); - list_del(&dev->todo_list); - - if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { - printk(KERN_ERR "network todo '%s' but state %d\n", - dev->name, dev->reg_state); - dump_stack(); - continue; - } - - dev->reg_state = NETREG_UNREGISTERED; - - on_each_cpu(flush_backlog, dev, 1); - - netdev_wait_allrefs(dev); - - /* paranoia */ - BUG_ON(atomic_read(&dev->refcnt)); - WARN_ON(dev->ip_ptr); - WARN_ON(dev->ip6_ptr); - WARN_ON(dev->dn_ptr); - - if (dev->destructor) - dev->destructor(dev); - - /* Free network device */ - kobject_put(&dev->dev.kobj); - } -} - -static struct net_device_stats *internal_stats(struct net_device *dev) -{ - return &dev->stats; -} - -static void netdev_init_one_queue(struct net_device *dev, - struct netdev_queue *queue, - void *_unused) -{ - queue->dev = dev; -} - -static void netdev_init_queues(struct net_device *dev) -{ - netdev_init_one_queue(dev, &dev->rx_queue, NULL); - netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); - spin_lock_init(&dev->tx_global_lock); -} - -/** - * alloc_netdev_mq - allocate network device - * @sizeof_priv: size of private data to allocate space for - * @name: device name format string - * @setup: callback to initialize device - * @queue_count: the number of subqueues to allocate - * - * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. Also allocates subquue structs - * for each queue on the device at the end of the netdevice. - */ -struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, - void (*setup)(struct net_device *), unsigned int queue_count) -{ - struct netdev_queue *tx; - struct net_device *dev; - size_t alloc_size; - void *p; - - BUG_ON(strlen(name) >= sizeof(dev->name)); - - alloc_size = sizeof(struct net_device); - if (sizeof_priv) { - /* ensure 32-byte alignment of private area */ - alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; - alloc_size += sizeof_priv; - } - /* ensure 32-byte alignment of whole construct */ - alloc_size += NETDEV_ALIGN_CONST; - - p = kzalloc(alloc_size, GFP_KERNEL); - if (!p) { - printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); - return NULL; - } - - tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); - if (!tx) { - printk(KERN_ERR "alloc_netdev: Unable to allocate " - "tx qdiscs.\n"); - kfree(p); - return NULL; - } - - dev = (struct net_device *) - (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); - dev->padded = (char *)dev - (char *)p; - dev_net_set(dev, &init_net); - - dev->_tx = tx; - dev->num_tx_queues = queue_count; - dev->real_num_tx_queues = queue_count; - - if (sizeof_priv) { - dev->priv = ((char *)dev + - ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) - & ~NETDEV_ALIGN_CONST)); - } - - dev->gso_max_size = GSO_MAX_SIZE; - - netdev_init_queues(dev); - - dev->get_stats = internal_stats; - netpoll_netdev_init(dev); - setup(dev); - strcpy(dev->name, name); - return dev; -} -EXPORT_SYMBOL(alloc_netdev_mq); - -/** - * free_netdev - free network device - * @dev: device - * - * This function does the last stage of destroying an allocated device - * interface. The reference to the device object is released. - * If this is the last reference then it will be freed. - */ -void free_netdev(struct net_device *dev) -{ - release_net(dev_net(dev)); - - kfree(dev->_tx); - - /* Compatibility with error handling in drivers */ - if (dev->reg_state == NETREG_UNINITIALIZED) { - kfree((char *)dev - dev->padded); - return; - } - - BUG_ON(dev->reg_state != NETREG_UNREGISTERED); - dev->reg_state = NETREG_RELEASED; - - /* will free via device release */ - put_device(&dev->dev); -} - -/* Synchronize with packet receive processing. */ -void synchronize_net(void) -{ - might_sleep(); - synchronize_rcu(); -} - -/** - * unregister_netdevice - remove device from the kernel - * @dev: device - * - * This function shuts down a device interface and removes it - * from the kernel tables. - * - * Callers must hold the rtnl semaphore. You may want - * unregister_netdev() instead of this. - */ - -void unregister_netdevice(struct net_device *dev) -{ - ASSERT_RTNL(); - - rollback_registered(dev); - /* Finish processing unregister after unlock */ - net_set_todo(dev); -} - -/** - * unregister_netdev - remove device from the kernel - * @dev: device - * - * This function shuts down a device interface and removes it - * from the kernel tables. - * - * This is just a wrapper for unregister_netdevice that takes - * the rtnl semaphore. In general you want to use this and not - * unregister_netdevice. - */ -void unregister_netdev(struct net_device *dev) -{ - rtnl_lock(); - unregister_netdevice(dev); - rtnl_unlock(); -} - -EXPORT_SYMBOL(unregister_netdev); - -/** - * dev_change_net_namespace - move device to different nethost namespace - * @dev: device - * @net: network namespace - * @pat: If not NULL name pattern to try if the current device name - * is already taken in the destination network namespace. - * - * This function shuts down a device interface and moves it - * to a new network namespace. On success 0 is returned, on - * a failure a netagive errno code is returned. - * - * Callers must hold the rtnl semaphore. - */ - -int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) -{ - char buf[IFNAMSIZ]; - const char *destname; - int err; - - ASSERT_RTNL(); - - /* Don't allow namespace local devices to be moved. */ - err = -EINVAL; - if (dev->features & NETIF_F_NETNS_LOCAL) - goto out; - -#ifdef CONFIG_SYSFS - /* Don't allow real devices to be moved when sysfs - * is enabled. - */ - err = -EINVAL; - if (dev->dev.parent) - goto out; -#endif - - /* Ensure the device has been registrered */ - err = -EINVAL; - if (dev->reg_state != NETREG_REGISTERED) - goto out; - - /* Get out if there is nothing todo */ - err = 0; - if (net_eq(dev_net(dev), net)) - goto out; - - /* Pick the destination device name, and ensure - * we can use it in the destination network namespace. - */ - err = -EEXIST; - destname = dev->name; - if (__dev_get_by_name(net, destname)) { - /* We get here if we can't use the current device name */ - if (!pat) - goto out; - if (!dev_valid_name(pat)) - goto out; - if (strchr(pat, '%')) { - if (__dev_alloc_name(net, pat, buf) < 0) - goto out; - destname = buf; - } else - destname = pat; - if (__dev_get_by_name(net, destname)) - goto out; - } - - /* - * And now a mini version of register_netdevice unregister_netdevice. - */ - - /* If device is running close it first. */ - dev_close(dev); - - /* And unlink it from device chain */ - err = -ENODEV; - unlist_netdevice(dev); - - synchronize_net(); - - /* Shutdown queueing discipline. */ - dev_shutdown(dev); - - /* Notify protocols, that we are about to destroy - this device. They should clean all the things. - */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - - /* - * Flush the unicast and multicast chains - */ - dev_addr_discard(dev); - - netdev_unregister_kobject(dev); - - /* Actually switch the network namespace */ - dev_net_set(dev, net); - - /* Assign the new device name */ - if (destname != dev->name) - strcpy(dev->name, destname); - - /* If there is an ifindex conflict assign a new one */ - if (__dev_get_by_index(net, dev->ifindex)) { - int iflink = (dev->iflink == dev->ifindex); - dev->ifindex = dev_new_index(net); - if (iflink) - dev->iflink = dev->ifindex; - } - - /* Fixup kobjects */ - err = netdev_register_kobject(dev); - WARN_ON(err); - - /* Add the device back in the hashes */ - list_netdevice(dev); - - /* Notify protocols, that a new device appeared. */ - call_netdevice_notifiers(NETDEV_REGISTER, dev); - - synchronize_net(); - err = 0; -out: - return err; -} - -static int dev_cpu_callback(struct notifier_block *nfb, - unsigned long action, - void *ocpu) -{ - struct sk_buff **list_skb; - struct Qdisc **list_net; - struct sk_buff *skb; - unsigned int cpu, oldcpu = (unsigned long)ocpu; - struct softnet_data *sd, *oldsd; - - if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) - return NOTIFY_OK; - - local_irq_disable(); - cpu = smp_processor_id(); - sd = &per_cpu(softnet_data, cpu); - oldsd = &per_cpu(softnet_data, oldcpu); - - /* Find end of our completion_queue. */ - list_skb = &sd->completion_queue; - while (*list_skb) - list_skb = &(*list_skb)->next; - /* Append completion queue from offline CPU. */ - *list_skb = oldsd->completion_queue; - oldsd->completion_queue = NULL; - - /* Find end of our output_queue. */ - list_net = &sd->output_queue; - while (*list_net) - list_net = &(*list_net)->next_sched; - /* Append output queue from offline CPU. */ - *list_net = oldsd->output_queue; - oldsd->output_queue = NULL; - - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_enable(); - - /* Process offline CPU's input_pkt_queue */ - while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) - netif_rx(skb); - - return NOTIFY_OK; -} - -#ifdef CONFIG_NET_DMA -/** - * net_dma_rebalance - try to maintain one DMA channel per CPU - * @net_dma: DMA client and associated data (lock, channels, channel_mask) - * - * This is called when the number of channels allocated to the net_dma client - * changes. The net_dma client tries to have one DMA channel per CPU. - */ - -static void net_dma_rebalance(struct net_dma *net_dma) -{ - unsigned int cpu, i, n, chan_idx; - struct dma_chan *chan; - - if (cpus_empty(net_dma->channel_mask)) { - for_each_online_cpu(cpu) - rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); - return; - } - - i = 0; - cpu = first_cpu(cpu_online_map); - - for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) { - chan = net_dma->channels[chan_idx]; - - n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) - + (i < (num_online_cpus() % - cpus_weight(net_dma->channel_mask)) ? 1 : 0)); - - while(n) { - per_cpu(softnet_data, cpu).net_dma = chan; - cpu = next_cpu(cpu, cpu_online_map); - n--; - } - i++; - } -} - -/** - * netdev_dma_event - event callback for the net_dma_client - * @client: should always be net_dma_client - * @chan: DMA channel for the event - * @state: DMA state to be handled - */ -static enum dma_state_client -netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_state state) -{ - int i, found = 0, pos = -1; - struct net_dma *net_dma = - container_of(client, struct net_dma, client); - enum dma_state_client ack = DMA_DUP; /* default: take no action */ - - spin_lock(&net_dma->lock); - switch (state) { - case DMA_RESOURCE_AVAILABLE: - for (i = 0; i < nr_cpu_ids; i++) - if (net_dma->channels[i] == chan) { - found = 1; - break; - } else if (net_dma->channels[i] == NULL && pos < 0) - pos = i; - - if (!found && pos >= 0) { - ack = DMA_ACK; - net_dma->channels[pos] = chan; - cpu_set(pos, net_dma->channel_mask); - net_dma_rebalance(net_dma); - } - break; - case DMA_RESOURCE_REMOVED: - for (i = 0; i < nr_cpu_ids; i++) - if (net_dma->channels[i] == chan) { - found = 1; - pos = i; - break; - } - - if (found) { - ack = DMA_ACK; - cpu_clear(pos, net_dma->channel_mask); - net_dma->channels[i] = NULL; - net_dma_rebalance(net_dma); - } - break; - default: - break; - } - spin_unlock(&net_dma->lock); - - return ack; -} - -/** - * netdev_dma_regiser - register the networking subsystem as a DMA client - */ -static int __init netdev_dma_register(void) -{ - net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma), - GFP_KERNEL); - if (unlikely(!net_dma.channels)) { - printk(KERN_NOTICE - "netdev_dma: no memory for net_dma.channels\n"); - return -ENOMEM; - } - spin_lock_init(&net_dma.lock); - dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); - dma_async_client_register(&net_dma.client); - dma_async_client_chan_request(&net_dma.client); - return 0; -} - -#else -static int __init netdev_dma_register(void) { return -ENODEV; } -#endif /* CONFIG_NET_DMA */ - -/** - * netdev_compute_feature - compute conjunction of two feature sets - * @all: first feature set - * @one: second feature set - * - * Computes a new feature set after adding a device with feature set - * @one to the master device with current feature set @all. Returns - * the new feature set. - */ -int netdev_compute_features(unsigned long all, unsigned long one) -{ - /* if device needs checksumming, downgrade to hw checksumming */ - if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) - all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; - - /* if device can't do all checksum, downgrade to ipv4/ipv6 */ - if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM)) - all ^= NETIF_F_HW_CSUM - | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; - - if (one & NETIF_F_GSO) - one |= NETIF_F_GSO_SOFTWARE; - one |= NETIF_F_GSO; - - /* If even one device supports robust GSO, enable it for all. */ - if (one & NETIF_F_GSO_ROBUST) - all |= NETIF_F_GSO_ROBUST; - - all &= one | NETIF_F_LLTX; - - if (!(all & NETIF_F_ALL_CSUM)) - all &= ~NETIF_F_SG; - if (!(all & NETIF_F_SG)) - all &= ~NETIF_F_GSO_MASK; - - return all; -} -EXPORT_SYMBOL(netdev_compute_features); - -static struct hlist_head *netdev_create_hash(void) -{ - int i; - struct hlist_head *hash; - - hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); - if (hash != NULL) - for (i = 0; i < NETDEV_HASHENTRIES; i++) - INIT_HLIST_HEAD(&hash[i]); - - return hash; -} - -/* Initialize per network namespace state */ -static int __net_init netdev_init(struct net *net) -{ - INIT_LIST_HEAD(&net->dev_base_head); - - net->dev_name_head = netdev_create_hash(); - if (net->dev_name_head == NULL) - goto err_name; - - net->dev_index_head = netdev_create_hash(); - if (net->dev_index_head == NULL) - goto err_idx; - - return 0; - -err_idx: - kfree(net->dev_name_head); -err_name: - return -ENOMEM; -} - -char *netdev_drivername(struct net_device *dev, char *buffer, int len) -{ - struct device_driver *driver; - struct device *parent; - - if (len <= 0 || !buffer) - return buffer; - buffer[0] = 0; - - parent = dev->dev.parent; - - if (!parent) - return buffer; - - driver = parent->driver; - if (driver && driver->name) - strlcpy(buffer, driver->name, len); - return buffer; -} - -static void __net_exit netdev_exit(struct net *net) -{ - kfree(net->dev_name_head); - kfree(net->dev_index_head); -} - -static struct pernet_operations __net_initdata netdev_net_ops = { - .init = netdev_init, - .exit = netdev_exit, -}; - -static void __net_exit default_device_exit(struct net *net) -{ - struct net_device *dev, *next; - /* - * Push all migratable of the network devices back to the - * initial network namespace - */ - rtnl_lock(); - for_each_netdev_safe(net, dev, next) { - int err; - char fb_name[IFNAMSIZ]; - - /* Ignore unmoveable devices (i.e. loopback) */ - if (dev->features & NETIF_F_NETNS_LOCAL) - continue; - - /* Push remaing network devices to init_net */ - snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); - err = dev_change_net_namespace(dev, &init_net, fb_name); - if (err) { - printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n", - __func__, dev->name, err); - BUG(); - } - } - rtnl_unlock(); -} - -static struct pernet_operations __net_initdata default_device_ops = { - .exit = default_device_exit, -}; - -/* - * Initialize the DEV module. At boot time this walks the device list and - * unhooks any devices that fail to initialise (normally hardware not - * present) and leaves us with a valid list of present and active devices. - * - */ - -/* - * This is called single threaded during boot, so no need - * to take the rtnl semaphore. - */ -static int __init net_dev_init(void) -{ - int i, rc = -ENOMEM; - - BUG_ON(!dev_boot_phase); - - if (dev_proc_init()) - goto out; - - if (netdev_kobject_init()) - goto out; - - INIT_LIST_HEAD(&ptype_all); - for (i = 0; i < PTYPE_HASH_SIZE; i++) - INIT_LIST_HEAD(&ptype_base[i]); - - if (register_pernet_subsys(&netdev_net_ops)) - goto out; - - if (register_pernet_device(&default_device_ops)) - goto out; - - /* - * Initialise the packet receive queues. - */ - - for_each_possible_cpu(i) { - struct softnet_data *queue; - - queue = &per_cpu(softnet_data, i); - skb_queue_head_init(&queue->input_pkt_queue); - queue->completion_queue = NULL; - INIT_LIST_HEAD(&queue->poll_list); - - queue->backlog.poll = process_backlog; - queue->backlog.weight = weight_p; - } - - netdev_dma_register(); - - dev_boot_phase = 0; - - open_softirq(NET_TX_SOFTIRQ, net_tx_action); - open_softirq(NET_RX_SOFTIRQ, net_rx_action); - - hotcpu_notifier(dev_cpu_callback, 0); - dst_init(); - dev_mcast_init(); - rc = 0; -out: - return rc; -} - -subsys_initcall(net_dev_init); - -EXPORT_SYMBOL(__dev_get_by_index); -EXPORT_SYMBOL(__dev_get_by_name); -EXPORT_SYMBOL(__dev_remove_pack); -EXPORT_SYMBOL(dev_valid_name); -EXPORT_SYMBOL(dev_add_pack); -EXPORT_SYMBOL(dev_alloc_name); -EXPORT_SYMBOL(dev_close); -EXPORT_SYMBOL(dev_get_by_flags); -EXPORT_SYMBOL(dev_get_by_index); -EXPORT_SYMBOL(dev_get_by_name); -EXPORT_SYMBOL(dev_open); -EXPORT_SYMBOL(dev_queue_xmit); -EXPORT_SYMBOL(dev_remove_pack); -EXPORT_SYMBOL(dev_set_allmulti); -EXPORT_SYMBOL(dev_set_promiscuity); -EXPORT_SYMBOL(dev_change_flags); -EXPORT_SYMBOL(dev_set_mtu); -EXPORT_SYMBOL(dev_set_mac_address); -EXPORT_SYMBOL(free_netdev); -EXPORT_SYMBOL(netdev_boot_setup_check); -EXPORT_SYMBOL(netdev_set_master); -EXPORT_SYMBOL(netdev_state_change); -EXPORT_SYMBOL(netif_receive_skb); -EXPORT_SYMBOL(netif_rx); -EXPORT_SYMBOL(register_gifconf); -EXPORT_SYMBOL(register_netdevice); -EXPORT_SYMBOL(register_netdevice_notifier); -EXPORT_SYMBOL(skb_checksum_help); -EXPORT_SYMBOL(synchronize_net); -EXPORT_SYMBOL(unregister_netdevice); -EXPORT_SYMBOL(unregister_netdevice_notifier); -EXPORT_SYMBOL(net_enable_timestamp); -EXPORT_SYMBOL(net_disable_timestamp); -EXPORT_SYMBOL(dev_get_flags); -EXPORT_PER_CPU_SYMBOL(sknid_elevator); - -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) -EXPORT_SYMBOL(br_handle_frame_hook); -EXPORT_SYMBOL(br_fdb_get_hook); -EXPORT_SYMBOL(br_fdb_put_hook); -#endif - -#ifdef CONFIG_KMOD -EXPORT_SYMBOL(dev_load); -#endif - -EXPORT_PER_CPU_SYMBOL(softnet_data); diff -Nurb linux-2.6.27-720/net/core/dev.c.rej linux-2.6.27-710/net/core/dev.c.rej --- linux-2.6.27-720/net/core/dev.c.rej 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/net/core/dev.c.rej 1969-12-31 19:00:00.000000000 -0500 @@ -1,30 +0,0 @@ -*************** -*** 2187,2199 **** - * NET_RX_SUCCESS: no congestion - * NET_RX_DROP: packet was dropped - */ -- int netif_receive_skb(struct sk_buff *skb) - { - struct packet_type *ptype, *pt_prev; - struct net_device *orig_dev; - struct net_device *null_or_orig; - int ret = NET_RX_DROP; -- __be16 type; - - /* if we've gotten here through NAPI, check netpoll */ - if (netpoll_receive_skb(skb)) ---- 2215,2228 ---- - * NET_RX_SUCCESS: no congestion - * NET_RX_DROP: packet was dropped - */ -+ //int netif_receive_skb(struct sk_buff *skb) -+ int __netif_receive_skb(struct sk_buff *skb, unsigned short type, int notifier_data) - { - struct packet_type *ptype, *pt_prev; - struct net_device *orig_dev; - struct net_device *null_or_orig; - int ret = NET_RX_DROP; -+ // __be16 type; - - /* if we've gotten here through NAPI, check netpoll */ - if (netpoll_receive_skb(skb)) diff -Nurb linux-2.6.27-720/net/core/neighbour.c linux-2.6.27-710/net/core/neighbour.c --- linux-2.6.27-720/net/core/neighbour.c 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/net/core/neighbour.c 2008-10-09 18:13:53.000000000 -0400 @@ -2702,7 +2702,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, int p_id, int pdev_id, char *p_name, - proc_handler_t *handler, ctl_handler *strategy) + proc_handler *handler, ctl_handler *strategy) { struct neigh_sysctl_table *t; const char *dev_name_source = NULL; diff -Nurb linux-2.6.27-720/net/core/skbuff.c linux-2.6.27-710/net/core/skbuff.c --- linux-2.6.27-720/net/core/skbuff.c 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/net/core/skbuff.c 2009-05-04 12:15:31.000000000 -0400 @@ -575,112 +575,6 @@ skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } - -/* Click: clear skb header state */ -static inline void skb_headerinit(void *p, struct kmem_cache *cache, - unsigned long flags) -{ - struct sk_buff *skb = p; - - skb->next = NULL; - skb->prev = NULL; - skb->sk = NULL; - skb->tstamp.tv64 = 0; /* No idea about time */ - skb->dev = NULL; - skb->iif = 0; - skb->dst = NULL; - skb->sp = NULL; - memset(skb->cb, 0, sizeof(skb->cb)); - skb->priority = 0; - skb->pkt_type = PACKET_HOST; /* Default type */ - skb->ip_summed = 0; - skb->destructor = NULL; - -#ifdef CONFIG_NETFILTER - skb->mark = 0; - skb->nfct = NULL; -# if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - skb->nfct_reasm = NULL; -# endif -# ifdef CONFIG_BRIDGE_NETFILTER - skb->nf_bridge = NULL; -# endif -#endif -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -# ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; -# endif -#endif -} - -/* Click: attempts to recycle a sk_buff. if it can be recycled, return it */ -struct sk_buff *skb_recycle(struct sk_buff *skb) -{ - if (atomic_dec_and_test(&skb->users)) { - dst_release(skb->dst); -#ifdef CONFIG_XFRM - secpath_put(skb->sp); -#endif - if(skb->destructor) { - WARN_ON(in_irq()); - skb->destructor(skb); - } -#ifdef CONFIG_NETFILTER - nf_conntrack_put(skb->nfct); -# if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put_reasm(skb->nfct_reasm); -# endif -# ifdef CONFIG_BRIDGE_NETFILTER - nf_bridge_put(skb->nf_bridge); -# endif -#endif - skb_headerinit(skb, NULL, 0); - - if (skb->fclone == SKB_FCLONE_UNAVAILABLE - && (!skb->cloned || - atomic_read(&skb_shinfo(skb)->dataref) == (skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1))) { - /* Don't need to atomic_sub skb_shinfo(skb)->dataref, - as we set that to 1 below. */ - - if (skb_shinfo(skb)->nr_frags) { - int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); - /* Jason Park patch */ - skb_shinfo(skb)->nr_frags = 0; - } - - if (skb_shinfo(skb)->frag_list) - skb_drop_fraglist(skb); - - /* Load the data pointers. */ - skb->data = skb->head; - skb->tail = skb->data; - /* end and truesize should have never changed */ - /* skb->end = skb->data + skb->truesize; */ - - /* set up other state */ - skb->len = 0; - skb->cloned = 0; - - atomic_set(&skb->users, 1); - atomic_set(&(skb_shinfo(skb)->dataref), 1); - /* Jason Park patch */ - skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_segs = 0; - skb_shinfo(skb)->gso_type = 0; - skb_shinfo(skb)->ip6_frag_id = 0; - - return skb; - } - - kfree_skbmem(skb); - } - - return 0; -} - /** * skb_copy - create private copy of an sk_buff * @skb: buffer to copy @@ -2710,7 +2604,6 @@ EXPORT_SYMBOL(skb_append_datato_frags); EXPORT_SYMBOL(__skb_warn_lro_forwarding); -EXPORT_SYMBOL(skb_recycle); EXPORT_SYMBOL_GPL(skb_to_sgvec); EXPORT_SYMBOL_GPL(skb_cow_data); EXPORT_SYMBOL_GPL(skb_partial_csum_set); diff -Nurb linux-2.6.27-720/net/core/skbuff.c.orig linux-2.6.27-710/net/core/skbuff.c.orig --- linux-2.6.27-720/net/core/skbuff.c.orig 2009-05-04 12:15:31.000000000 -0400 +++ linux-2.6.27-710/net/core/skbuff.c.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,2609 +0,0 @@ -/* - * Routines having to do with the 'struct sk_buff' memory handlers. - * - * Authors: Alan Cox - * Florian La Roche - * - * Fixes: - * Alan Cox : Fixed the worst of the load - * balancer bugs. - * Dave Platt : Interrupt stacking fix. - * Richard Kooijman : Timestamp fixes. - * Alan Cox : Changed buffer format. - * Alan Cox : destructor hook for AF_UNIX etc. - * Linus Torvalds : Better skb_clone. - * Alan Cox : Added skb_copy. - * Alan Cox : Added all the changed routines Linus - * only put in the headers - * Ray VanTassle : Fixed --skb->lock in free - * Alan Cox : skb_copy copy arp field - * Andi Kleen : slabified it. - * Robert Olsson : Removed skb_head_pool - * - * NOTE: - * The __skb_ routines should be called with interrupts - * disabled, or you better be *real* sure that the operation is atomic - * with respect to whatever list is being frobbed (e.g. via lock_sock() - * or via disabling bottom half handlers, etc). - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* - * The functions in this file will not compile correctly with gcc 2.4.x - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_NET_CLS_ACT -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include "kmap_skb.h" - -static struct kmem_cache *skbuff_head_cache __read_mostly; -static struct kmem_cache *skbuff_fclone_cache __read_mostly; - -static void sock_pipe_buf_release(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - struct sk_buff *skb = (struct sk_buff *) buf->private; - - kfree_skb(skb); -} - -static void sock_pipe_buf_get(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - struct sk_buff *skb = (struct sk_buff *) buf->private; - - skb_get(skb); -} - -static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - return 1; -} - - -/* Pipe buffer operations for a socket. */ -static struct pipe_buf_operations sock_pipe_buf_ops = { - .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, - .confirm = generic_pipe_buf_confirm, - .release = sock_pipe_buf_release, - .steal = sock_pipe_buf_steal, - .get = sock_pipe_buf_get, -}; - -/* - * Keep out-of-line to prevent kernel bloat. - * __builtin_return_address is not used because it is not always - * reliable. - */ - -/** - * skb_over_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_put(). Not user callable. - */ -void skb_over_panic(struct sk_buff *skb, int sz, void *here) -{ - printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " - "data:%p tail:%#lx end:%#lx dev:%s\n", - here, skb->len, sz, skb->head, skb->data, - (unsigned long)skb->tail, (unsigned long)skb->end, - skb->dev ? skb->dev->name : ""); - BUG(); -} - -/** - * skb_under_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_push(). Not user callable. - */ - -void skb_under_panic(struct sk_buff *skb, int sz, void *here) -{ - printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " - "data:%p tail:%#lx end:%#lx dev:%s\n", - here, skb->len, sz, skb->head, skb->data, - (unsigned long)skb->tail, (unsigned long)skb->end, - skb->dev ? skb->dev->name : ""); - BUG(); -} - -void skb_truesize_bug(struct sk_buff *skb) -{ - printk(KERN_ERR "SKB BUG: Invalid truesize (%u) " - "len=%u, sizeof(sk_buff)=%Zd\n", - skb->truesize, skb->len, sizeof(struct sk_buff)); -} -EXPORT_SYMBOL(skb_truesize_bug); - -/* Allocate a new skbuff. We do this ourselves so we can fill in a few - * 'private' fields and also do memory statistics to find all the - * [BEEP] leaks. - * - */ - -/** - * __alloc_skb - allocate a network buffer - * @size: size to allocate - * @gfp_mask: allocation mask - * @fclone: allocate from fclone cache instead of head cache - * and allocate a cloned (child) skb - * @node: numa node to allocate memory on - * - * Allocate a new &sk_buff. The returned buffer has no headroom and a - * tail room of size bytes. The object has a reference count of one. - * The return is the buffer. On a failure the return is %NULL. - * - * Buffers may only be allocated from interrupts using a @gfp_mask of - * %GFP_ATOMIC. - */ -struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, - int fclone, int node) -{ - struct kmem_cache *cache; - struct skb_shared_info *shinfo; - struct sk_buff *skb; - u8 *data; - - cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; - - /* Get the HEAD */ - skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); - if (!skb) - goto out; - - size = SKB_DATA_ALIGN(size); - data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), - gfp_mask, node); - if (!data) - goto nodata; - - /* - * Only clear those fields we need to clear, not those that we will - * actually initialise below. Hence, don't put any more fields after - * the tail pointer in struct sk_buff! - */ - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->truesize = size + sizeof(struct sk_buff); - atomic_set(&skb->users, 1); - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - if (!in_interrupt()) skb->skb_tag = nx_current_nid(); else skb->skb_tag = 0; - /* make sure we initialize shinfo sequentially */ - shinfo = skb_shinfo(skb); - atomic_set(&shinfo->dataref, 1); - shinfo->nr_frags = 0; - shinfo->gso_size = 0; - shinfo->gso_segs = 0; - shinfo->gso_type = 0; - shinfo->ip6_frag_id = 0; - shinfo->frag_list = NULL; - - if (fclone) { - struct sk_buff *child = skb + 1; - atomic_t *fclone_ref = (atomic_t *) (child + 1); - - skb->fclone = SKB_FCLONE_ORIG; - atomic_set(fclone_ref, 1); - - child->fclone = SKB_FCLONE_UNAVAILABLE; - } -out: - return skb; -nodata: - kmem_cache_free(cache, skb); - skb = NULL; - goto out; -} - -/** - * __netdev_alloc_skb - allocate an skbuff for rx on a specific device - * @dev: network device to receive on - * @length: length to allocate - * @gfp_mask: get_free_pages mask, passed to alloc_skb - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned if there is no free memory. - */ -struct sk_buff *__netdev_alloc_skb(struct net_device *dev, - unsigned int length, gfp_t gfp_mask) -{ - int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; - struct sk_buff *skb; - - skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD); - skb->dev = dev; - } - return skb; -} - -/** - * dev_alloc_skb - allocate an skbuff for receiving - * @length: length to allocate - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned if there is no free memory. Although this function - * allocates memory it can be called from an interrupt. - */ -struct sk_buff *dev_alloc_skb(unsigned int length) -{ - /* - * There is more code here than it seems: - * __dev_alloc_skb is an inline - */ - return __dev_alloc_skb(length, GFP_ATOMIC); -} -EXPORT_SYMBOL(dev_alloc_skb); - -static void skb_drop_list(struct sk_buff **listp) -{ - struct sk_buff *list = *listp; - - *listp = NULL; - - do { - struct sk_buff *this = list; - list = list->next; - kfree_skb(this); - } while (list); -} - -static inline void skb_drop_fraglist(struct sk_buff *skb) -{ - skb_drop_list(&skb_shinfo(skb)->frag_list); -} - -static void skb_clone_fraglist(struct sk_buff *skb) -{ - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list = list->next) - skb_get(list); -} - -static void skb_release_data(struct sk_buff *skb) -{ - if (!skb->cloned || - !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, - &skb_shinfo(skb)->dataref)) { - if (skb_shinfo(skb)->nr_frags) { - int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); - } - - if (skb_shinfo(skb)->frag_list) - skb_drop_fraglist(skb); - - kfree(skb->head); - } -} - -/* - * Free an skbuff by memory without cleaning the state. - */ -static void kfree_skbmem(struct sk_buff *skb) -{ - struct sk_buff *other; - atomic_t *fclone_ref; - - switch (skb->fclone) { - case SKB_FCLONE_UNAVAILABLE: - kmem_cache_free(skbuff_head_cache, skb); - break; - - case SKB_FCLONE_ORIG: - fclone_ref = (atomic_t *) (skb + 2); - if (atomic_dec_and_test(fclone_ref)) - kmem_cache_free(skbuff_fclone_cache, skb); - break; - - case SKB_FCLONE_CLONE: - fclone_ref = (atomic_t *) (skb + 1); - other = skb - 1; - - /* The clone portion is available for - * fast-cloning again. - */ - skb->fclone = SKB_FCLONE_UNAVAILABLE; - - if (atomic_dec_and_test(fclone_ref)) - kmem_cache_free(skbuff_fclone_cache, other); - break; - } -} - -/* Free everything but the sk_buff shell. */ -static void skb_release_all(struct sk_buff *skb) -{ - dst_release(skb->dst); -#ifdef CONFIG_XFRM - secpath_put(skb->sp); -#endif - if (skb->destructor) { - WARN_ON(in_irq()); - skb->destructor(skb); - } -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put(skb->nfct); - nf_conntrack_put_reasm(skb->nfct_reasm); -#endif -#ifdef CONFIG_BRIDGE_NETFILTER - nf_bridge_put(skb->nf_bridge); -#endif -/* XXX: IS this still necessary? - JHS */ -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; -#endif -#endif - skb_release_data(skb); -} - -/** - * __kfree_skb - private function - * @skb: buffer - * - * Free an sk_buff. Release anything attached to the buffer. - * Clean the state. This is an internal helper function. Users should - * always call kfree_skb - */ - -void __kfree_skb(struct sk_buff *skb) -{ - skb_release_all(skb); - kfree_skbmem(skb); -} - -/** - * kfree_skb - free an sk_buff - * @skb: buffer to free - * - * Drop a reference to the buffer and free it if the usage count has - * hit zero. - */ -void kfree_skb(struct sk_buff *skb) -{ - if (unlikely(!skb)) - return; - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) - return; - __kfree_skb(skb); -} - -static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) -{ - new->tstamp = old->tstamp; - new->dev = old->dev; - new->transport_header = old->transport_header; - new->network_header = old->network_header; - new->mac_header = old->mac_header; - new->dst = dst_clone(old->dst); -#ifdef CONFIG_INET - new->sp = secpath_get(old->sp); -#endif - memcpy(new->cb, old->cb, sizeof(old->cb)); - new->csum_start = old->csum_start; - new->csum_offset = old->csum_offset; - new->local_df = old->local_df; - new->pkt_type = old->pkt_type; - new->ip_summed = old->ip_summed; - skb_copy_queue_mapping(new, old); - new->priority = old->priority; -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) - new->ipvs_property = old->ipvs_property; -#endif - new->protocol = old->protocol; - new->mark = old->mark; - __nf_copy(new, old); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - new->nf_trace = old->nf_trace; -#endif -#ifdef CONFIG_NET_SCHED - new->tc_index = old->tc_index; -#ifdef CONFIG_NET_CLS_ACT - new->tc_verd = old->tc_verd; -#endif -#endif - new->vlan_tci = old->vlan_tci; - new->skb_tag = old->skb_tag; - - skb_copy_secmark(new, old); -} - -static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) -{ -#define C(x) n->x = skb->x - - n->next = n->prev = NULL; - n->sk = NULL; - __copy_skb_header(n, skb); - - C(len); - C(data_len); - C(mac_len); - n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; - n->cloned = 1; - n->nohdr = 0; - n->destructor = NULL; - C(iif); - C(tail); - C(end); - C(head); - C(data); - C(truesize); -#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) - C(do_not_encrypt); -#endif - atomic_set(&n->users, 1); - - /* Sapan: Cloned skbs aren't owned by anyone. Let the cloner decide who it belongs to. */ - - atomic_inc(&(skb_shinfo(skb)->dataref)); - skb->cloned = 1; - - return n; -#undef C -} - -/** - * skb_morph - morph one skb into another - * @dst: the skb to receive the contents - * @src: the skb to supply the contents - * - * This is identical to skb_clone except that the target skb is - * supplied by the user. - * - * The target skb is returned upon exit. - */ -struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) -{ - skb_release_all(dst); - return __skb_clone(dst, src); -} -EXPORT_SYMBOL_GPL(skb_morph); - -/** - * skb_clone - duplicate an sk_buff - * @skb: buffer to clone - * @gfp_mask: allocation priority - * - * Duplicate an &sk_buff. The new one is not owned by a socket. Both - * copies share the same packet data but not structure. The new - * buffer has a reference count of 1. If the allocation fails the - * function returns %NULL otherwise the new buffer is returned. - * - * If this function is called from an interrupt gfp_mask() must be - * %GFP_ATOMIC. - */ - -struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) -{ - struct sk_buff *n; - - n = skb + 1; - if (skb->fclone == SKB_FCLONE_ORIG && - n->fclone == SKB_FCLONE_UNAVAILABLE) { - atomic_t *fclone_ref = (atomic_t *) (n + 1); - n->fclone = SKB_FCLONE_CLONE; - atomic_inc(fclone_ref); - } else { - n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - if (!n) - return NULL; - n->fclone = SKB_FCLONE_UNAVAILABLE; - } - - return __skb_clone(n, skb); -} - -static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) -{ -#ifndef NET_SKBUFF_DATA_USES_OFFSET - /* - * Shift between the two data areas in bytes - */ - unsigned long offset = new->data - old->data; -#endif - - __copy_skb_header(new, old); - -#ifndef NET_SKBUFF_DATA_USES_OFFSET - /* {transport,network,mac}_header are relative to skb->head */ - new->transport_header += offset; - new->network_header += offset; - new->mac_header += offset; -#endif - skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; - skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; - skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; -} - -/** - * skb_copy - create private copy of an sk_buff - * @skb: buffer to copy - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and its data. This is used when the - * caller wishes to modify the data and needs a private copy of the - * data to alter. Returns %NULL on failure or the pointer to the buffer - * on success. The returned buffer has a reference count of 1. - * - * As by-product this function converts non-linear &sk_buff to linear - * one, so that &sk_buff becomes completely private and caller is allowed - * to modify all the data of returned buffer. This means that this - * function is not recommended for use in circumstances when only - * header is going to be modified. Use pskb_copy() instead. - */ - -struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) -{ - int headerlen = skb->data - skb->head; - /* - * Allocate the copy buffer - */ - struct sk_buff *n; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - n = alloc_skb(skb->end + skb->data_len, gfp_mask); -#else - n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); -#endif - if (!n) - return NULL; - - /* Set the data pointer */ - skb_reserve(n, headerlen); - /* Set the tail pointer and length */ - skb_put(n, skb->len); - - if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) - BUG(); - - copy_skb_header(n, skb); - return n; -} - - -/** - * pskb_copy - create copy of an sk_buff with private head. - * @skb: buffer to copy - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and part of its data, located - * in header. Fragmented data remain shared. This is used when - * the caller wishes to modify only header of &sk_buff and needs - * private copy of the header to alter. Returns %NULL on failure - * or the pointer to the buffer on success. - * The returned buffer has a reference count of 1. - */ - -struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) -{ - /* - * Allocate the copy buffer - */ - struct sk_buff *n; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - n = alloc_skb(skb->end, gfp_mask); -#else - n = alloc_skb(skb->end - skb->head, gfp_mask); -#endif - if (!n) - goto out; - - /* Set the data pointer */ - skb_reserve(n, skb->data - skb->head); - /* Set the tail pointer and length */ - skb_put(n, skb_headlen(skb)); - /* Copy the bytes */ - skb_copy_from_linear_data(skb, n->data, n->len); - - n->truesize += skb->data_len; - n->data_len = skb->data_len; - n->len = skb->len; - - if (skb_shinfo(skb)->nr_frags) { - int i; - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; - get_page(skb_shinfo(n)->frags[i].page); - } - skb_shinfo(n)->nr_frags = i; - } - - if (skb_shinfo(skb)->frag_list) { - skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; - skb_clone_fraglist(n); - } - - copy_skb_header(n, skb); -out: - return n; -} - -/** - * pskb_expand_head - reallocate header of &sk_buff - * @skb: buffer to reallocate - * @nhead: room to add at head - * @ntail: room to add at tail - * @gfp_mask: allocation priority - * - * Expands (or creates identical copy, if &nhead and &ntail are zero) - * header of skb. &sk_buff itself is not changed. &sk_buff MUST have - * reference count of 1. Returns zero in the case of success or error, - * if expansion failed. In the last case, &sk_buff is not changed. - * - * All the pointers pointing into skb header may change and must be - * reloaded after call to this function. - */ - -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, - gfp_t gfp_mask) -{ - int i; - u8 *data; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - int size = nhead + skb->end + ntail; -#else - int size = nhead + (skb->end - skb->head) + ntail; -#endif - long off; - - if (skb_shared(skb)) - BUG(); - - size = SKB_DATA_ALIGN(size); - - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (!data) - goto nodata; - - /* Copy only real data... and, alas, header. This should be - * optimized for the cases when header is void. */ -#ifdef NET_SKBUFF_DATA_USES_OFFSET - memcpy(data + nhead, skb->head, skb->tail); -#else - memcpy(data + nhead, skb->head, skb->tail - skb->head); -#endif - memcpy(data + size, skb_end_pointer(skb), - sizeof(struct skb_shared_info)); - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); - - if (skb_shinfo(skb)->frag_list) - skb_clone_fraglist(skb); - - skb_release_data(skb); - - off = (data + nhead) - skb->head; - - skb->head = data; - skb->data += off; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; - off = nhead; -#else - skb->end = skb->head + size; -#endif - /* {transport,network,mac}_header and tail are relative to skb->head */ - skb->tail += off; - skb->transport_header += off; - skb->network_header += off; - skb->mac_header += off; - skb->csum_start += nhead; - skb->cloned = 0; - skb->hdr_len = 0; - skb->nohdr = 0; - atomic_set(&skb_shinfo(skb)->dataref, 1); - return 0; - -nodata: - return -ENOMEM; -} - -/* Make private copy of skb with writable head and some headroom */ - -struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) -{ - struct sk_buff *skb2; - int delta = headroom - skb_headroom(skb); - - if (delta <= 0) - skb2 = pskb_copy(skb, GFP_ATOMIC); - else { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, - GFP_ATOMIC)) { - kfree_skb(skb2); - skb2 = NULL; - } - } - return skb2; -} - - -/** - * skb_copy_expand - copy and expand sk_buff - * @skb: buffer to copy - * @newheadroom: new free bytes at head - * @newtailroom: new free bytes at tail - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and its data and while doing so - * allocate additional space. - * - * This is used when the caller wishes to modify the data and needs a - * private copy of the data to alter as well as more space for new fields. - * Returns %NULL on failure or the pointer to the buffer - * on success. The returned buffer has a reference count of 1. - * - * You must pass %GFP_ATOMIC as the allocation priority if this function - * is called from an interrupt. - */ -struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, int newtailroom, - gfp_t gfp_mask) -{ - /* - * Allocate the copy buffer - */ - struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask); - int oldheadroom = skb_headroom(skb); - int head_copy_len, head_copy_off; - int off; - - if (!n) - return NULL; - - skb_reserve(n, newheadroom); - - /* Set the tail pointer and length */ - skb_put(n, skb->len); - - head_copy_len = oldheadroom; - head_copy_off = 0; - if (newheadroom <= head_copy_len) - head_copy_len = newheadroom; - else - head_copy_off = newheadroom - head_copy_len; - - /* Copy the linear header and data. */ - if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, - skb->len + head_copy_len)) - BUG(); - - copy_skb_header(n, skb); - - off = newheadroom - oldheadroom; - n->csum_start += off; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - n->transport_header += off; - n->network_header += off; - n->mac_header += off; -#endif - - return n; -} - -/** - * skb_pad - zero pad the tail of an skb - * @skb: buffer to pad - * @pad: space to pad - * - * Ensure that a buffer is followed by a padding area that is zero - * filled. Used by network drivers which may DMA or transfer data - * beyond the buffer end onto the wire. - * - * May return error in out of memory cases. The skb is freed on error. - */ - -int skb_pad(struct sk_buff *skb, int pad) -{ - int err; - int ntail; - - /* If the skbuff is non linear tailroom is always zero.. */ - if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { - memset(skb->data+skb->len, 0, pad); - return 0; - } - - ntail = skb->data_len + pad - (skb->end - skb->tail); - if (likely(skb_cloned(skb) || ntail > 0)) { - err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); - if (unlikely(err)) - goto free_skb; - } - - /* FIXME: The use of this function with non-linear skb's really needs - * to be audited. - */ - err = skb_linearize(skb); - if (unlikely(err)) - goto free_skb; - - memset(skb->data + skb->len, 0, pad); - return 0; - -free_skb: - kfree_skb(skb); - return err; -} - -/** - * skb_put - add data to a buffer - * @skb: buffer to use - * @len: amount of data to add - * - * This function extends the used data area of the buffer. If this would - * exceed the total buffer size the kernel will panic. A pointer to the - * first byte of the extra data is returned. - */ -unsigned char *skb_put(struct sk_buff *skb, unsigned int len) -{ - unsigned char *tmp = skb_tail_pointer(skb); - SKB_LINEAR_ASSERT(skb); - skb->tail += len; - skb->len += len; - if (unlikely(skb->tail > skb->end)) - skb_over_panic(skb, len, __builtin_return_address(0)); - return tmp; -} -EXPORT_SYMBOL(skb_put); - -/** - * skb_push - add data to the start of a buffer - * @skb: buffer to use - * @len: amount of data to add - * - * This function extends the used data area of the buffer at the buffer - * start. If this would exceed the total buffer headroom the kernel will - * panic. A pointer to the first byte of the extra data is returned. - */ -unsigned char *skb_push(struct sk_buff *skb, unsigned int len) -{ - skb->data -= len; - skb->len += len; - if (unlikely(skb->datahead)) - skb_under_panic(skb, len, __builtin_return_address(0)); - return skb->data; -} -EXPORT_SYMBOL(skb_push); - -/** - * skb_pull - remove data from the start of a buffer - * @skb: buffer to use - * @len: amount of data to remove - * - * This function removes data from the start of a buffer, returning - * the memory to the headroom. A pointer to the next data in the buffer - * is returned. Once the data has been pulled future pushes will overwrite - * the old data. - */ -unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) -{ - return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); -} -EXPORT_SYMBOL(skb_pull); - -/** - * skb_trim - remove end from a buffer - * @skb: buffer to alter - * @len: new length - * - * Cut the length of a buffer down by removing data from the tail. If - * the buffer is already under the length specified it is not modified. - * The skb must be linear. - */ -void skb_trim(struct sk_buff *skb, unsigned int len) -{ - if (skb->len > len) - __skb_trim(skb, len); -} -EXPORT_SYMBOL(skb_trim); - -/* Trims skb to length len. It can change skb pointers. - */ - -int ___pskb_trim(struct sk_buff *skb, unsigned int len) -{ - struct sk_buff **fragp; - struct sk_buff *frag; - int offset = skb_headlen(skb); - int nfrags = skb_shinfo(skb)->nr_frags; - int i; - int err; - - if (skb_cloned(skb) && - unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) - return err; - - i = 0; - if (offset >= len) - goto drop_pages; - - for (; i < nfrags; i++) { - int end = offset + skb_shinfo(skb)->frags[i].size; - - if (end < len) { - offset = end; - continue; - } - - skb_shinfo(skb)->frags[i++].size = len - offset; - -drop_pages: - skb_shinfo(skb)->nr_frags = i; - - for (; i < nfrags; i++) - put_page(skb_shinfo(skb)->frags[i].page); - - if (skb_shinfo(skb)->frag_list) - skb_drop_fraglist(skb); - goto done; - } - - for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); - fragp = &frag->next) { - int end = offset + frag->len; - - if (skb_shared(frag)) { - struct sk_buff *nfrag; - - nfrag = skb_clone(frag, GFP_ATOMIC); - if (unlikely(!nfrag)) - return -ENOMEM; - - nfrag->next = frag->next; - kfree_skb(frag); - frag = nfrag; - *fragp = frag; - } - - if (end < len) { - offset = end; - continue; - } - - if (end > len && - unlikely((err = pskb_trim(frag, len - offset)))) - return err; - - if (frag->next) - skb_drop_list(&frag->next); - break; - } - -done: - if (len > skb_headlen(skb)) { - skb->data_len -= skb->len - len; - skb->len = len; - } else { - skb->len = len; - skb->data_len = 0; - skb_set_tail_pointer(skb, len); - } - - return 0; -} - -/** - * __pskb_pull_tail - advance tail of skb header - * @skb: buffer to reallocate - * @delta: number of bytes to advance tail - * - * The function makes a sense only on a fragmented &sk_buff, - * it expands header moving its tail forward and copying necessary - * data from fragmented part. - * - * &sk_buff MUST have reference count of 1. - * - * Returns %NULL (and &sk_buff does not change) if pull failed - * or value of new tail of skb in the case of success. - * - * All the pointers pointing into skb header may change and must be - * reloaded after call to this function. - */ - -/* Moves tail of skb head forward, copying data from fragmented part, - * when it is necessary. - * 1. It may fail due to malloc failure. - * 2. It may change skb pointers. - * - * It is pretty complicated. Luckily, it is called only in exceptional cases. - */ -unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) -{ - /* If skb has not enough free space at tail, get new one - * plus 128 bytes for future expansions. If we have enough - * room at tail, reallocate without expansion only if skb is cloned. - */ - int i, k, eat = (skb->tail + delta) - skb->end; - - if (eat > 0 || skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, - GFP_ATOMIC)) - return NULL; - } - - if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)) - BUG(); - - /* Optimization: no fragments, no reasons to preestimate - * size of pulled pages. Superb. - */ - if (!skb_shinfo(skb)->frag_list) - goto pull_pages; - - /* Estimate size of pulled pages. */ - eat = delta; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size >= eat) - goto pull_pages; - eat -= skb_shinfo(skb)->frags[i].size; - } - - /* If we need update frag list, we are in troubles. - * Certainly, it possible to add an offset to skb data, - * but taking into account that pulling is expected to - * be very rare operation, it is worth to fight against - * further bloating skb head and crucify ourselves here instead. - * Pure masohism, indeed. 8)8) - */ - if (eat) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - struct sk_buff *clone = NULL; - struct sk_buff *insp = NULL; - - do { - BUG_ON(!list); - - if (list->len <= eat) { - /* Eaten as whole. */ - eat -= list->len; - list = list->next; - insp = list; - } else { - /* Eaten partially. */ - - if (skb_shared(list)) { - /* Sucks! We need to fork list. :-( */ - clone = skb_clone(list, GFP_ATOMIC); - if (!clone) - return NULL; - insp = list->next; - list = clone; - } else { - /* This may be pulled without - * problems. */ - insp = list; - } - if (!pskb_pull(list, eat)) { - if (clone) - kfree_skb(clone); - return NULL; - } - break; - } - } while (eat); - - /* Free pulled out fragments. */ - while ((list = skb_shinfo(skb)->frag_list) != insp) { - skb_shinfo(skb)->frag_list = list->next; - kfree_skb(list); - } - /* And insert new clone at head. */ - if (clone) { - clone->next = list; - skb_shinfo(skb)->frag_list = clone; - } - } - /* Success! Now we may commit changes to skb data. */ - -pull_pages: - eat = delta; - k = 0; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); - eat -= skb_shinfo(skb)->frags[i].size; - } else { - skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; - if (eat) { - skb_shinfo(skb)->frags[k].page_offset += eat; - skb_shinfo(skb)->frags[k].size -= eat; - eat = 0; - } - k++; - } - } - skb_shinfo(skb)->nr_frags = k; - - skb->tail += delta; - skb->data_len -= delta; - - return skb_tail_pointer(skb); -} - -/* Copy some data bits from skb to kernel buffer. */ - -int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) -{ - int i, copy; - int start = skb_headlen(skb); - - if (offset > (int)skb->len - len) - goto fault; - - /* Copy header. */ - if ((copy = start - offset) > 0) { - if (copy > len) - copy = len; - skb_copy_from_linear_data_offset(skb, offset, to, copy); - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - WARN_ON(start > offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - u8 *vaddr; - - if (copy > len) - copy = len; - - vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); - memcpy(to, - vaddr + skb_shinfo(skb)->frags[i].page_offset+ - offset - start, copy); - kunmap_skb_frag(vaddr); - - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_bits(list, offset - start, - to, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; - } - } - if (!len) - return 0; - -fault: - return -EFAULT; -} - -/* - * Callback from splice_to_pipe(), if we need to release some pages - * at the end of the spd in case we error'ed out in filling the pipe. - */ -static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) -{ - struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private; - - kfree_skb(skb); -} - -/* - * Fill page/offset/length into spd, if it can hold more pages. - */ -static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, - unsigned int len, unsigned int offset, - struct sk_buff *skb) -{ - if (unlikely(spd->nr_pages == PIPE_BUFFERS)) - return 1; - - spd->pages[spd->nr_pages] = page; - spd->partial[spd->nr_pages].len = len; - spd->partial[spd->nr_pages].offset = offset; - spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb); - spd->nr_pages++; - return 0; -} - -static inline void __segment_seek(struct page **page, unsigned int *poff, - unsigned int *plen, unsigned int off) -{ - *poff += off; - *page += *poff / PAGE_SIZE; - *poff = *poff % PAGE_SIZE; - *plen -= off; -} - -static inline int __splice_segment(struct page *page, unsigned int poff, - unsigned int plen, unsigned int *off, - unsigned int *len, struct sk_buff *skb, - struct splice_pipe_desc *spd) -{ - if (!*len) - return 1; - - /* skip this segment if already processed */ - if (*off >= plen) { - *off -= plen; - return 0; - } - - /* ignore any bits we already processed */ - if (*off) { - __segment_seek(&page, &poff, &plen, *off); - *off = 0; - } - - do { - unsigned int flen = min(*len, plen); - - /* the linear region may spread across several pages */ - flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - - if (spd_fill_page(spd, page, flen, poff, skb)) - return 1; - - __segment_seek(&page, &poff, &plen, flen); - *len -= flen; - - } while (*len && plen); - - return 0; -} - -/* - * Map linear and fragment data from the skb to spd. It reports failure if the - * pipe is full or if we already spliced the requested length. - */ -static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, - unsigned int *len, - struct splice_pipe_desc *spd) -{ - int seg; - - /* - * map the linear part - */ - if (__splice_segment(virt_to_page(skb->data), - (unsigned long) skb->data & (PAGE_SIZE - 1), - skb_headlen(skb), - offset, len, skb, spd)) - return 1; - - /* - * then map the fragments - */ - for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { - const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; - - if (__splice_segment(f->page, f->page_offset, f->size, - offset, len, skb, spd)) - return 1; - } - - return 0; -} - -/* - * Map data from the skb to a pipe. Should handle both the linear part, - * the fragments, and the frag list. It does NOT handle frag lists within - * the frag list, if such a thing exists. We'd probably need to recurse to - * handle that cleanly. - */ -int skb_splice_bits(struct sk_buff *__skb, unsigned int offset, - struct pipe_inode_info *pipe, unsigned int tlen, - unsigned int flags) -{ - struct partial_page partial[PIPE_BUFFERS]; - struct page *pages[PIPE_BUFFERS]; - struct splice_pipe_desc spd = { - .pages = pages, - .partial = partial, - .flags = flags, - .ops = &sock_pipe_buf_ops, - .spd_release = sock_spd_release, - }; - struct sk_buff *skb; - - /* - * I'd love to avoid the clone here, but tcp_read_sock() - * ignores reference counts and unconditonally kills the sk_buff - * on return from the actor. - */ - skb = skb_clone(__skb, GFP_KERNEL); - if (unlikely(!skb)) - return -ENOMEM; - - /* - * __skb_splice_bits() only fails if the output has no room left, - * so no point in going over the frag_list for the error case. - */ - if (__skb_splice_bits(skb, &offset, &tlen, &spd)) - goto done; - else if (!tlen) - goto done; - - /* - * now see if we have a frag_list to map - */ - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list && tlen; list = list->next) { - if (__skb_splice_bits(list, &offset, &tlen, &spd)) - break; - } - } - -done: - /* - * drop our reference to the clone, the pipe consumption will - * drop the rest. - */ - kfree_skb(skb); - - if (spd.nr_pages) { - int ret; - struct sock *sk = __skb->sk; - - /* - * Drop the socket lock, otherwise we have reverse - * locking dependencies between sk_lock and i_mutex - * here as compared to sendfile(). We enter here - * with the socket lock held, and splice_to_pipe() will - * grab the pipe inode lock. For sendfile() emulation, - * we call into ->sendpage() with the i_mutex lock held - * and networking will grab the socket lock. - */ - release_sock(sk); - ret = splice_to_pipe(pipe, &spd); - lock_sock(sk); - return ret; - } - - return 0; -} - -/** - * skb_store_bits - store bits from kernel buffer to skb - * @skb: destination buffer - * @offset: offset in destination - * @from: source buffer - * @len: number of bytes to copy - * - * Copy the specified number of bytes from the source buffer to the - * destination skb. This function handles all the messy bits of - * traversing fragment lists and such. - */ - -int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) -{ - int i, copy; - int start = skb_headlen(skb); - - if (offset > (int)skb->len - len) - goto fault; - - if ((copy = start - offset) > 0) { - if (copy > len) - copy = len; - skb_copy_to_linear_data_offset(skb, offset, from, copy); - if ((len -= copy) == 0) - return 0; - offset += copy; - from += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - int end; - - WARN_ON(start > offset + len); - - end = start + frag->size; - if ((copy = end - offset) > 0) { - u8 *vaddr; - - if (copy > len) - copy = len; - - vaddr = kmap_skb_frag(frag); - memcpy(vaddr + frag->page_offset + offset - start, - from, copy); - kunmap_skb_frag(vaddr); - - if ((len -= copy) == 0) - return 0; - offset += copy; - from += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_store_bits(list, offset - start, - from, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - from += copy; - } - start = end; - } - } - if (!len) - return 0; - -fault: - return -EFAULT; -} - -EXPORT_SYMBOL(skb_store_bits); - -/* Checksum skb data. */ - -__wsum skb_checksum(const struct sk_buff *skb, int offset, - int len, __wsum csum) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - int pos = 0; - - /* Checksum header. */ - if (copy > 0) { - if (copy > len) - copy = len; - csum = csum_partial(skb->data + offset, copy, csum); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos = copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - WARN_ON(start > offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - __wsum csum2; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - vaddr = kmap_skb_frag(frag); - csum2 = csum_partial(vaddr + frag->page_offset + - offset - start, copy, 0); - kunmap_skb_frag(vaddr); - csum = csum_block_add(csum, csum2, pos); - if (!(len -= copy)) - return csum; - offset += copy; - pos += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - __wsum csum2; - if (copy > len) - copy = len; - csum2 = skb_checksum(list, offset - start, - copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos += copy; - } - start = end; - } - } - BUG_ON(len); - - return csum; -} - -/* Both of above in one bottle. */ - -__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, - u8 *to, int len, __wsum csum) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - int pos = 0; - - /* Copy header. */ - if (copy > 0) { - if (copy > len) - copy = len; - csum = csum_partial_copy_nocheck(skb->data + offset, to, - copy, csum); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos = copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - WARN_ON(start > offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - __wsum csum2; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - vaddr = kmap_skb_frag(frag); - csum2 = csum_partial_copy_nocheck(vaddr + - frag->page_offset + - offset - start, to, - copy, 0); - kunmap_skb_frag(vaddr); - csum = csum_block_add(csum, csum2, pos); - if (!(len -= copy)) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - __wsum csum2; - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - csum2 = skb_copy_and_csum_bits(list, - offset - start, - to, copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; - } - } - BUG_ON(len); - return csum; -} - -void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) -{ - __wsum csum; - long csstart; - - if (skb->ip_summed == CHECKSUM_PARTIAL) - csstart = skb->csum_start - skb_headroom(skb); - else - csstart = skb_headlen(skb); - - BUG_ON(csstart > skb_headlen(skb)); - - skb_copy_from_linear_data(skb, to, csstart); - - csum = 0; - if (csstart != skb->len) - csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, - skb->len - csstart, 0); - - if (skb->ip_summed == CHECKSUM_PARTIAL) { - long csstuff = csstart + skb->csum_offset; - - *((__sum16 *)(to + csstuff)) = csum_fold(csum); - } -} - -/** - * skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. The list lock is taken so the function - * may be used safely with other locking list functions. The head item is - * returned or %NULL if the list is empty. - */ - -struct sk_buff *skb_dequeue(struct sk_buff_head *list) -{ - unsigned long flags; - struct sk_buff *result; - - spin_lock_irqsave(&list->lock, flags); - result = __skb_dequeue(list); - spin_unlock_irqrestore(&list->lock, flags); - return result; -} - -/** - * skb_dequeue_tail - remove from the tail of the queue - * @list: list to dequeue from - * - * Remove the tail of the list. The list lock is taken so the function - * may be used safely with other locking list functions. The tail item is - * returned or %NULL if the list is empty. - */ -struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) -{ - unsigned long flags; - struct sk_buff *result; - - spin_lock_irqsave(&list->lock, flags); - result = __skb_dequeue_tail(list); - spin_unlock_irqrestore(&list->lock, flags); - return result; -} - -/** - * skb_queue_purge - empty a list - * @list: list to empty - * - * Delete all buffers on an &sk_buff list. Each buffer is removed from - * the list and one reference dropped. This function takes the list - * lock and is atomic with respect to other list locking functions. - */ -void skb_queue_purge(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb = skb_dequeue(list)) != NULL) - kfree_skb(skb); -} - -/** - * skb_queue_head - queue a buffer at the list head - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the start of the list. This function takes the - * list lock and can be used safely with other locking &sk_buff functions - * safely. - * - * A buffer cannot be placed on two lists at the same time. - */ -void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_head(list, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} - -/** - * skb_queue_tail - queue a buffer at the list tail - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the tail of the list. This function takes the - * list lock and can be used safely with other locking &sk_buff functions - * safely. - * - * A buffer cannot be placed on two lists at the same time. - */ -void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_tail(list, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} - -/** - * skb_unlink - remove a buffer from a list - * @skb: buffer to remove - * @list: list to use - * - * Remove a packet from a list. The list locks are taken and this - * function is atomic with respect to other list locked calls - * - * You must know what list the SKB is on. - */ -void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_unlink(skb, list); - spin_unlock_irqrestore(&list->lock, flags); -} - -/** - * skb_append - append a buffer - * @old: buffer to insert after - * @newsk: buffer to insert - * @list: list to use - * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls. - * A buffer cannot be placed on two lists at the same time. - */ -void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_after(list, old, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} - - -/** - * skb_insert - insert a buffer - * @old: buffer to insert before - * @newsk: buffer to insert - * @list: list to use - * - * Place a packet before a given packet in a list. The list locks are - * taken and this function is atomic with respect to other list locked - * calls. - * - * A buffer cannot be placed on two lists at the same time. - */ -void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_insert(newsk, old->prev, old, list); - spin_unlock_irqrestore(&list->lock, flags); -} - -static inline void skb_split_inside_header(struct sk_buff *skb, - struct sk_buff* skb1, - const u32 len, const int pos) -{ - int i; - - skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len), - pos - len); - /* And move data appendix as is. */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; - - skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; - skb_shinfo(skb)->nr_frags = 0; - skb1->data_len = skb->data_len; - skb1->len += skb1->data_len; - skb->data_len = 0; - skb->len = len; - skb_set_tail_pointer(skb, len); -} - -static inline void skb_split_no_header(struct sk_buff *skb, - struct sk_buff* skb1, - const u32 len, int pos) -{ - int i, k = 0; - const int nfrags = skb_shinfo(skb)->nr_frags; - - skb_shinfo(skb)->nr_frags = 0; - skb1->len = skb1->data_len = skb->len - len; - skb->len = len; - skb->data_len = len - pos; - - for (i = 0; i < nfrags; i++) { - int size = skb_shinfo(skb)->frags[i].size; - - if (pos + size > len) { - skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; - - if (pos < len) { - /* Split frag. - * We have two variants in this case: - * 1. Move all the frag to the second - * part, if it is possible. F.e. - * this approach is mandatory for TUX, - * where splitting is expensive. - * 2. Split is accurately. We make this. - */ - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb1)->frags[0].page_offset += len - pos; - skb_shinfo(skb1)->frags[0].size -= len - pos; - skb_shinfo(skb)->frags[i].size = len - pos; - skb_shinfo(skb)->nr_frags++; - } - k++; - } else - skb_shinfo(skb)->nr_frags++; - pos += size; - } - skb_shinfo(skb1)->nr_frags = k; -} - -/** - * skb_split - Split fragmented skb to two parts at length len. - * @skb: the buffer to split - * @skb1: the buffer to receive the second part - * @len: new length for skb - */ -void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) -{ - int pos = skb_headlen(skb); - - if (len < pos) /* Split line is inside header. */ - skb_split_inside_header(skb, skb1, len, pos); - else /* Second chunk has no header, nothing to copy. */ - skb_split_no_header(skb, skb1, len, pos); -} - -/** - * skb_prepare_seq_read - Prepare a sequential read of skb data - * @skb: the buffer to read - * @from: lower offset of data to be read - * @to: upper offset of data to be read - * @st: state variable - * - * Initializes the specified state variable. Must be called before - * invoking skb_seq_read() for the first time. - */ -void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, - unsigned int to, struct skb_seq_state *st) -{ - st->lower_offset = from; - st->upper_offset = to; - st->root_skb = st->cur_skb = skb; - st->frag_idx = st->stepped_offset = 0; - st->frag_data = NULL; -} - -/** - * skb_seq_read - Sequentially read skb data - * @consumed: number of bytes consumed by the caller so far - * @data: destination pointer for data to be returned - * @st: state variable - * - * Reads a block of skb data at &consumed relative to the - * lower offset specified to skb_prepare_seq_read(). Assigns - * the head of the data block to &data and returns the length - * of the block or 0 if the end of the skb data or the upper - * offset has been reached. - * - * The caller is not required to consume all of the data - * returned, i.e. &consumed is typically set to the number - * of bytes already consumed and the next call to - * skb_seq_read() will return the remaining part of the block. - * - * Note 1: The size of each block of data returned can be arbitary, - * this limitation is the cost for zerocopy seqeuental - * reads of potentially non linear data. - * - * Note 2: Fragment lists within fragments are not implemented - * at the moment, state->root_skb could be replaced with - * a stack for this purpose. - */ -unsigned int skb_seq_read(unsigned int consumed, const u8 **data, - struct skb_seq_state *st) -{ - unsigned int block_limit, abs_offset = consumed + st->lower_offset; - skb_frag_t *frag; - - if (unlikely(abs_offset >= st->upper_offset)) - return 0; - -next_skb: - block_limit = skb_headlen(st->cur_skb); - - if (abs_offset < block_limit) { - *data = st->cur_skb->data + abs_offset; - return block_limit - abs_offset; - } - - if (st->frag_idx == 0 && !st->frag_data) - st->stepped_offset += skb_headlen(st->cur_skb); - - while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { - frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; - block_limit = frag->size + st->stepped_offset; - - if (abs_offset < block_limit) { - if (!st->frag_data) - st->frag_data = kmap_skb_frag(frag); - - *data = (u8 *) st->frag_data + frag->page_offset + - (abs_offset - st->stepped_offset); - - return block_limit - abs_offset; - } - - if (st->frag_data) { - kunmap_skb_frag(st->frag_data); - st->frag_data = NULL; - } - - st->frag_idx++; - st->stepped_offset += frag->size; - } - - if (st->frag_data) { - kunmap_skb_frag(st->frag_data); - st->frag_data = NULL; - } - - if (st->cur_skb->next) { - st->cur_skb = st->cur_skb->next; - st->frag_idx = 0; - goto next_skb; - } else if (st->root_skb == st->cur_skb && - skb_shinfo(st->root_skb)->frag_list) { - st->cur_skb = skb_shinfo(st->root_skb)->frag_list; - goto next_skb; - } - - return 0; -} - -/** - * skb_abort_seq_read - Abort a sequential read of skb data - * @st: state variable - * - * Must be called if skb_seq_read() was not called until it - * returned 0. - */ -void skb_abort_seq_read(struct skb_seq_state *st) -{ - if (st->frag_data) - kunmap_skb_frag(st->frag_data); -} - -#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) - -static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, - struct ts_config *conf, - struct ts_state *state) -{ - return skb_seq_read(offset, text, TS_SKB_CB(state)); -} - -static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) -{ - skb_abort_seq_read(TS_SKB_CB(state)); -} - -/** - * skb_find_text - Find a text pattern in skb data - * @skb: the buffer to look in - * @from: search offset - * @to: search limit - * @config: textsearch configuration - * @state: uninitialized textsearch state variable - * - * Finds a pattern in the skb data according to the specified - * textsearch configuration. Use textsearch_next() to retrieve - * subsequent occurrences of the pattern. Returns the offset - * to the first occurrence or UINT_MAX if no match was found. - */ -unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, - unsigned int to, struct ts_config *config, - struct ts_state *state) -{ - unsigned int ret; - - config->get_next_block = skb_ts_get_next_block; - config->finish = skb_ts_finish; - - skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); - - ret = textsearch_find(config, state); - return (ret <= to - from ? ret : UINT_MAX); -} - -/** - * skb_append_datato_frags: - append the user data to a skb - * @sk: sock structure - * @skb: skb structure to be appened with user data. - * @getfrag: call back function to be used for getting the user data - * @from: pointer to user message iov - * @length: length of the iov message - * - * Description: This procedure append the user data in the fragment part - * of the skb if any page alloc fails user this procedure returns -ENOMEM - */ -int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, - int (*getfrag)(void *from, char *to, int offset, - int len, int odd, struct sk_buff *skb), - void *from, int length) -{ - int frg_cnt = 0; - skb_frag_t *frag = NULL; - struct page *page = NULL; - int copy, left; - int offset = 0; - int ret; - - do { - /* Return error if we don't have space for new frag */ - frg_cnt = skb_shinfo(skb)->nr_frags; - if (frg_cnt >= MAX_SKB_FRAGS) - return -EFAULT; - - /* allocate a new page for next frag */ - page = alloc_pages(sk->sk_allocation, 0); - - /* If alloc_page fails just return failure and caller will - * free previous allocated pages by doing kfree_skb() - */ - if (page == NULL) - return -ENOMEM; - - /* initialize the next frag */ - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; - skb_fill_page_desc(skb, frg_cnt, page, 0, 0); - skb->truesize += PAGE_SIZE; - atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); - - /* get the new initialized frag */ - frg_cnt = skb_shinfo(skb)->nr_frags; - frag = &skb_shinfo(skb)->frags[frg_cnt - 1]; - - /* copy the user data to page */ - left = PAGE_SIZE - frag->page_offset; - copy = (length > left)? left : length; - - ret = getfrag(from, (page_address(frag->page) + - frag->page_offset + frag->size), - offset, copy, 0, skb); - if (ret < 0) - return -EFAULT; - - /* copy was successful so update the size parameters */ - sk->sk_sndmsg_off += copy; - frag->size += copy; - skb->len += copy; - skb->data_len += copy; - offset += copy; - length -= copy; - - } while (length > 0); - - return 0; -} - -/** - * skb_pull_rcsum - pull skb and update receive checksum - * @skb: buffer to update - * @len: length of data pulled - * - * This function performs an skb_pull on the packet and updates - * the CHECKSUM_COMPLETE checksum. It should be used on - * receive path processing instead of skb_pull unless you know - * that the checksum difference is zero (e.g., a valid IP header) - * or you are setting ip_summed to CHECKSUM_NONE. - */ -unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) -{ - BUG_ON(len > skb->len); - skb->len -= len; - BUG_ON(skb->len < skb->data_len); - skb_postpull_rcsum(skb, skb->data, len); - return skb->data += len; -} - -EXPORT_SYMBOL_GPL(skb_pull_rcsum); - -/** - * skb_segment - Perform protocol segmentation on skb. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - * - * This function performs segmentation on the given skb. It returns - * a pointer to the first in a list of new skbs for the segments. - * In case of error it returns ERR_PTR(err). - */ -struct sk_buff *skb_segment(struct sk_buff *skb, int features) -{ - struct sk_buff *segs = NULL; - struct sk_buff *tail = NULL; - unsigned int mss = skb_shinfo(skb)->gso_size; - unsigned int doffset = skb->data - skb_mac_header(skb); - unsigned int offset = doffset; - unsigned int headroom; - unsigned int len; - int sg = features & NETIF_F_SG; - int nfrags = skb_shinfo(skb)->nr_frags; - int err = -ENOMEM; - int i = 0; - int pos; - - __skb_push(skb, doffset); - headroom = skb_headroom(skb); - pos = skb_headlen(skb); - - do { - struct sk_buff *nskb; - skb_frag_t *frag; - int hsize; - int k; - int size; - - len = skb->len - offset; - if (len > mss) - len = mss; - - hsize = skb_headlen(skb) - offset; - if (hsize < 0) - hsize = 0; - if (hsize > len || !sg) - hsize = len; - - nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC); - if (unlikely(!nskb)) - goto err; - - if (segs) - tail->next = nskb; - else - segs = nskb; - tail = nskb; - - __copy_skb_header(nskb, skb); - nskb->mac_len = skb->mac_len; - - skb_reserve(nskb, headroom); - skb_reset_mac_header(nskb); - skb_set_network_header(nskb, skb->mac_len); - nskb->transport_header = (nskb->network_header + - skb_network_header_len(skb)); - skb_copy_from_linear_data(skb, skb_put(nskb, doffset), - doffset); - if (!sg) { - nskb->ip_summed = CHECKSUM_NONE; - nskb->csum = skb_copy_and_csum_bits(skb, offset, - skb_put(nskb, len), - len, 0); - continue; - } - - frag = skb_shinfo(nskb)->frags; - k = 0; - - skb_copy_from_linear_data_offset(skb, offset, - skb_put(nskb, hsize), hsize); - - while (pos < offset + len) { - BUG_ON(i >= nfrags); - - *frag = skb_shinfo(skb)->frags[i]; - get_page(frag->page); - size = frag->size; - - if (pos < offset) { - frag->page_offset += offset - pos; - frag->size -= offset - pos; - } - - k++; - - if (pos + size <= offset + len) { - i++; - pos += size; - } else { - frag->size -= pos + size - (offset + len); - break; - } - - frag++; - } - - skb_shinfo(nskb)->nr_frags = k; - nskb->data_len = len - hsize; - nskb->len += nskb->data_len; - nskb->truesize += nskb->data_len; - } while ((offset += len) < skb->len); - - return segs; - -err: - while ((skb = segs)) { - segs = skb->next; - kfree_skb(skb); - } - return ERR_PTR(err); -} - -EXPORT_SYMBOL_GPL(skb_segment); - -void __init skb_init(void) -{ - skbuff_head_cache = kmem_cache_create("skbuff_head_cache", - sizeof(struct sk_buff), - 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL); - skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", - (2*sizeof(struct sk_buff)) + - sizeof(atomic_t), - 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL); -} - -/** - * skb_to_sgvec - Fill a scatter-gather list from a socket buffer - * @skb: Socket buffer containing the buffers to be mapped - * @sg: The scatter-gather list to map into - * @offset: The offset into the buffer's contents to start mapping - * @len: Length of buffer space to be mapped - * - * Fill the specified scatter-gather list with mappings/pointers into a - * region of the buffer space attached to a socket buffer. - */ -static int -__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - int elt = 0; - - if (copy > 0) { - if (copy > len) - copy = len; - sg_set_buf(sg, skb->data + offset, copy); - elt++; - if ((len -= copy) == 0) - return elt; - offset += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - WARN_ON(start > offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - sg_set_page(&sg[elt], frag->page, copy, - frag->page_offset+offset-start); - elt++; - if (!(len -= copy)) - return elt; - offset += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - elt += __skb_to_sgvec(list, sg+elt, offset - start, - copy); - if ((len -= copy) == 0) - return elt; - offset += copy; - } - start = end; - } - } - BUG_ON(len); - return elt; -} - -int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) -{ - int nsg = __skb_to_sgvec(skb, sg, offset, len); - - sg_mark_end(&sg[nsg - 1]); - - return nsg; -} - -/** - * skb_cow_data - Check that a socket buffer's data buffers are writable - * @skb: The socket buffer to check. - * @tailbits: Amount of trailing space to be added - * @trailer: Returned pointer to the skb where the @tailbits space begins - * - * Make sure that the data buffers attached to a socket buffer are - * writable. If they are not, private copies are made of the data buffers - * and the socket buffer is set to use these instead. - * - * If @tailbits is given, make sure that there is space to write @tailbits - * bytes of data beyond current end of socket buffer. @trailer will be - * set to point to the skb in which this space begins. - * - * The number of scatterlist elements required to completely map the - * COW'd and extended socket buffer will be returned. - */ -int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) -{ - int copyflag; - int elt; - struct sk_buff *skb1, **skb_p; - - /* If skb is cloned or its head is paged, reallocate - * head pulling out all the pages (pages are considered not writable - * at the moment even if they are anonymous). - */ - if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && - __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) - return -ENOMEM; - - /* Easy case. Most of packets will go this way. */ - if (!skb_shinfo(skb)->frag_list) { - /* A little of trouble, not enough of space for trailer. - * This should not happen, when stack is tuned to generate - * good frames. OK, on miss we reallocate and reserve even more - * space, 128 bytes is fair. */ - - if (skb_tailroom(skb) < tailbits && - pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) - return -ENOMEM; - - /* Voila! */ - *trailer = skb; - return 1; - } - - /* Misery. We are in troubles, going to mincer fragments... */ - - elt = 1; - skb_p = &skb_shinfo(skb)->frag_list; - copyflag = 0; - - while ((skb1 = *skb_p) != NULL) { - int ntail = 0; - - /* The fragment is partially pulled by someone, - * this can happen on input. Copy it and everything - * after it. */ - - if (skb_shared(skb1)) - copyflag = 1; - - /* If the skb is the last, worry about trailer. */ - - if (skb1->next == NULL && tailbits) { - if (skb_shinfo(skb1)->nr_frags || - skb_shinfo(skb1)->frag_list || - skb_tailroom(skb1) < tailbits) - ntail = tailbits + 128; - } - - if (copyflag || - skb_cloned(skb1) || - ntail || - skb_shinfo(skb1)->nr_frags || - skb_shinfo(skb1)->frag_list) { - struct sk_buff *skb2; - - /* Fuck, we are miserable poor guys... */ - if (ntail == 0) - skb2 = skb_copy(skb1, GFP_ATOMIC); - else - skb2 = skb_copy_expand(skb1, - skb_headroom(skb1), - ntail, - GFP_ATOMIC); - if (unlikely(skb2 == NULL)) - return -ENOMEM; - - if (skb1->sk) - skb_set_owner_w(skb2, skb1->sk); - - /* Looking around. Are we still alive? - * OK, link new skb, drop old one */ - - skb2->next = skb1->next; - *skb_p = skb2; - kfree_skb(skb1); - skb1 = skb2; - } - elt++; - *trailer = skb1; - skb_p = &skb1->next; - } - - return elt; -} - -/** - * skb_partial_csum_set - set up and verify partial csum values for packet - * @skb: the skb to set - * @start: the number of bytes after skb->data to start checksumming. - * @off: the offset from start to place the checksum. - * - * For untrusted partially-checksummed packets, we need to make sure the values - * for skb->csum_start and skb->csum_offset are valid so we don't oops. - * - * This function checks and sets those values and skb->ip_summed: if this - * returns false you should drop the packet. - */ -bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) -{ - if (unlikely(start > skb->len - 2) || - unlikely((int)start + off > skb->len - 2)) { - if (net_ratelimit()) - printk(KERN_WARNING - "bad partial csum: csum=%u/%u len=%u\n", - start, off, skb->len); - return false; - } - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + start; - skb->csum_offset = off; - return true; -} - -void __skb_warn_lro_forwarding(const struct sk_buff *skb) -{ - if (net_ratelimit()) - pr_warning("%s: received packets cannot be forwarded" - " while LRO is enabled\n", skb->dev->name); -} - -EXPORT_SYMBOL(___pskb_trim); -EXPORT_SYMBOL(__kfree_skb); -EXPORT_SYMBOL(kfree_skb); -EXPORT_SYMBOL(__pskb_pull_tail); -EXPORT_SYMBOL(__alloc_skb); -EXPORT_SYMBOL(__netdev_alloc_skb); -EXPORT_SYMBOL(pskb_copy); -EXPORT_SYMBOL(pskb_expand_head); -EXPORT_SYMBOL(skb_checksum); -EXPORT_SYMBOL(skb_clone); -EXPORT_SYMBOL(skb_copy); -EXPORT_SYMBOL(skb_copy_and_csum_bits); -EXPORT_SYMBOL(skb_copy_and_csum_dev); -EXPORT_SYMBOL(skb_copy_bits); -EXPORT_SYMBOL(skb_copy_expand); -EXPORT_SYMBOL(skb_over_panic); -EXPORT_SYMBOL(skb_pad); -EXPORT_SYMBOL(skb_realloc_headroom); -EXPORT_SYMBOL(skb_under_panic); -EXPORT_SYMBOL(skb_dequeue); -EXPORT_SYMBOL(skb_dequeue_tail); -EXPORT_SYMBOL(skb_insert); -EXPORT_SYMBOL(skb_queue_purge); -EXPORT_SYMBOL(skb_queue_head); -EXPORT_SYMBOL(skb_queue_tail); -EXPORT_SYMBOL(skb_unlink); -EXPORT_SYMBOL(skb_append); -EXPORT_SYMBOL(skb_split); -EXPORT_SYMBOL(skb_prepare_seq_read); -EXPORT_SYMBOL(skb_seq_read); -EXPORT_SYMBOL(skb_abort_seq_read); -EXPORT_SYMBOL(skb_find_text); -EXPORT_SYMBOL(skb_append_datato_frags); -EXPORT_SYMBOL(__skb_warn_lro_forwarding); - -EXPORT_SYMBOL_GPL(skb_to_sgvec); -EXPORT_SYMBOL_GPL(skb_cow_data); -EXPORT_SYMBOL_GPL(skb_partial_csum_set); diff -Nurb linux-2.6.27-720/net/ipv4/arp.c linux-2.6.27-710/net/ipv4/arp.c --- linux-2.6.27-720/net/ipv4/arp.c 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/net/ipv4/arp.c 2008-10-09 18:13:53.000000000 -0400 @@ -328,7 +328,6 @@ { __be32 saddr = 0; u8 *dst_ha = NULL; - u8 dst_ha_buf[MAX_ADDR_LEN+sizeof(unsigned long)]; struct net_device *dev = neigh->dev; __be32 target = *(__be32*)neigh->primary_key; int probes = atomic_read(&neigh->probes); @@ -366,8 +365,8 @@ if ((probes -= neigh->parms->ucast_probes) < 0) { if (!(neigh->nud_state&NUD_VALID)) printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n"); - memcpy(dst_ha_buf, neigh->ha, sizeof(neigh->ha)); - dst_ha = dst_ha_buf; + dst_ha = neigh->ha; + read_lock_bh(&neigh->lock); } else if ((probes -= neigh->parms->app_probes) < 0) { #ifdef CONFIG_ARPD neigh_app_ns(neigh); @@ -377,6 +376,8 @@ arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, dst_ha, dev->dev_addr, NULL); + if (dst_ha) + read_unlock_bh(&neigh->lock); } static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) diff -Nurb linux-2.6.27-720/net/ipv4/devinet.c linux-2.6.27-710/net/ipv4/devinet.c --- linux-2.6.27-720/net/ipv4/devinet.c 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/net/ipv4/devinet.c 2009-05-04 12:15:14.000000000 -0400 @@ -1687,4 +1687,3 @@ EXPORT_SYMBOL(inetdev_by_index); EXPORT_SYMBOL(register_inetaddr_notifier); EXPORT_SYMBOL(unregister_inetaddr_notifier); -EXPORT_SYMBOL(devinet_ioctl); diff -Nurb linux-2.6.27-720/scr linux-2.6.27-710/scr --- linux-2.6.27-720/scr 2009-05-04 12:18:34.000000000 -0400 +++ linux-2.6.27-710/scr 1969-12-31 19:00:00.000000000 -0500 @@ -1,29 +0,0 @@ -vi -o ./include/asm-um/elf-x86_64.h ./include/asm-um/elf-x86_64.h.rej -vi -o ./include/asm-frv/tlbflush.h ./include/asm-frv/tlbflush.h.rej -vi -o ./include/asm-frv/pgtable.h ./include/asm-frv/pgtable.h.rej -vi -o ./include/asm-xtensa/pgalloc.h ./include/asm-xtensa/pgalloc.h.rej -vi -o ./include/asm-xtensa/pgtable.h ./include/asm-xtensa/pgtable.h.rej -vi -o ./include/asm-xtensa/module.h ./include/asm-xtensa/module.h.rej -vi -o ./include/asm-xtensa/system.h ./include/asm-xtensa/system.h.rej -vi -o ./include/asm-xtensa/timex.h ./include/asm-xtensa/timex.h.rej -vi -o ./include/net/sock.h ./include/net/sock.h.rej -vi -o ./include/net/compat.h ./include/net/compat.h.rej -vi -o ./include/asm-mips/fpu.h ./include/asm-mips/fpu.h.rej -vi -o ./include/asm-mips/time.h ./include/asm-mips/time.h.rej -vi -o ./include/asm-mips/irq.h ./include/asm-mips/irq.h.rej -vi -o ./include/asm-mips/system.h ./include/asm-mips/system.h.rej -vi -o ./include/linux/lockdep.h ./include/linux/lockdep.h.rej -vi -o ./include/linux/ktime.h ./include/linux/ktime.h.rej -vi -o ./include/linux/textsearch.h ./include/linux/textsearch.h.rej -vi -o ./include/linux/backing-dev.h ./include/linux/backing-dev.h.rej -vi -o ./include/linux/kexec.h ./include/linux/kexec.h.rej -vi -o ./include/linux/sysctl.h ./include/linux/sysctl.h.rej -vi -o ./include/linux/netdevice.h ./include/linux/netdevice.h.rej -vi -o ./include/linux/spinlock_types_up.h ./include/linux/spinlock_types_up.h.rej -vi -o ./include/linux/list.h ./include/linux/list.h.rej -vi -o ./include/asm-m68k/module.h ./include/asm-m68k/module.h.rej -vi -o ./net/core/dev.c ./net/core/dev.c.rej -vi -o ./net/core/skbuff.c ./net/core/skbuff.c.rej -vi -o ./net/core/neighbour.c ./net/core/neighbour.c.rej -vi -o ./drivers/media/dvb/bt8xx/bt878.h ./drivers/media/dvb/bt8xx/bt878.h.rej -vi -o ./drivers/net/ibmveth.c ./drivers/net/ibmveth.c.rej