X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Fsysctl.c;h=34df070e58960de9d50aca6b63b3b24ca316d8e2;hb=refs%2Fheads%2Fvserver;hp=e95f475d0bd9d3adb349f89dc6e6d102a59519cb;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e95f475d0..34df070e5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -18,19 +18,21 @@ * Removed it and replaced it with older style, 03/23/00, Bill Wendling */ -#include #include #include #include #include #include #include +#include #include #include #include #include #include #include +#include +#include #include #include #include @@ -39,56 +41,81 @@ #include #include #include +#include +#include +#include +#include + #include +#include -#ifdef CONFIG_ROOT_NFS -#include +extern int proc_nr_files(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + +#ifdef CONFIG_X86 +#include +#include #endif #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ -extern int panic_timeout; extern int C_A_D; extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; +extern int sysctl_panic_on_oom; extern int max_threads; -extern atomic_t nr_queued_signals; -extern int max_queued_signals; -extern int sysrq_enabled; extern int core_uses_pid; +extern int suid_dumpable; extern char core_pattern[]; -extern int cad_pid; extern int pid_max; -extern int sysctl_lower_zone_protection; extern int min_free_kbytes; extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; +extern int pid_max_min, pid_max_max; +extern int sysctl_drop_caches; +extern int percpu_pagelist_fraction; +extern int compat_log; + +int exec_shield = (1<<0); +/* exec_shield is a bitmask: + 0: off; vdso at STACK_TOP, 1 page below TASK_SIZE + (1<<0) 1: on [also on if !=0] + (1<<1) 2: force noexecstack regardless of PT_GNU_STACK + The old settings + (1<<2) 4: vdso just below .text of main (unless too low) + (1<<3) 8: vdso just below .text of PT_INTERP (unless too low) + are ignored because the vdso is placed completely randomly +*/ + +static int __init setup_exec_shield(char *str) +{ + get_option (&str, &exec_shield); + + return 1; +} + +__setup("exec-shield=", setup_exec_shield); /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; +static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; #ifdef CONFIG_KMOD extern char modprobe_path[]; #endif -#ifdef CONFIG_HOTPLUG -extern char hotplug_path[]; -#endif extern char vshelper_path[]; #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; #endif #ifdef CONFIG_SYSVIPC -extern size_t shm_ctlmax; -extern size_t shm_ctlall; -extern int shm_ctlmni; -extern int msg_ctlmax; -extern int msg_ctlmnb; -extern int msg_ctlmni; -extern int sem_ctls[]; +static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); #endif #ifdef __sparc__ @@ -102,29 +129,50 @@ extern int pwrsw_enabled; extern int unaligned_enabled; #endif -#ifdef CONFIG_ARCH_S390 +#ifdef CONFIG_S390 #ifdef CONFIG_MATHEMU extern int sysctl_ieee_emulation_warnings; #endif extern int sysctl_userprocess_debug; +extern int spin_retry; #endif extern int sysctl_hz_timer; -#if defined(CONFIG_PPC32) && defined(CONFIG_6xx) -extern unsigned long powersave_nap; -int proc_dol2crvec(ctl_table *table, int write, struct file *filp, - void *buffer, size_t *lenp); -#endif - #ifdef CONFIG_BSD_PROCESS_ACCT extern int acct_parm[]; #endif -static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, - ctl_table *, void **); -static int proc_doutsstring(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp); +#ifdef CONFIG_IA64 +extern int no_unaligned_warning; +#endif + +#ifdef CONFIG_RT_MUTEXES +extern int max_lock_depth; +#endif + +#ifdef CONFIG_SYSCTL_SYSCALL +static int parse_table(int __user *, int, void __user *, size_t __user *, + void __user *, size_t, ctl_table *); +#endif + +static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + +static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen); + +#ifdef CONFIG_SYSVIPC +static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen); +#endif + +#ifdef CONFIG_PROC_SYSCTL +static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); +#endif static ctl_table root_table[]; static struct ctl_table_header root_table_header = @@ -132,10 +180,6 @@ static struct ctl_table_header root_table_header = static ctl_table kern_table[]; static ctl_table vm_table[]; -#ifdef CONFIG_NET -extern ctl_table net_table[]; -#endif -static ctl_table proc_table[]; static ctl_table fs_table[]; static ctl_table debug_table[]; static ctl_table dev_table[]; @@ -143,16 +187,57 @@ extern ctl_table random_table[]; #ifdef CONFIG_UNIX98_PTYS extern ctl_table pty_table[]; #endif +#ifdef CONFIG_INOTIFY_USER +extern ctl_table inotify_table[]; +#endif + +#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT +int sysctl_legacy_va_layout; +#endif + +static void *get_uts(ctl_table *table, int write) +{ + char *which = table->data; +#ifdef CONFIG_UTS_NS + struct uts_namespace *uts_ns = current->nsproxy->uts_ns; + which = (which - (char *)&init_uts_ns) + (char *)uts_ns; +#endif + if (!write) + down_read(&uts_sem); + else + down_write(&uts_sem); + return which; +} + +static void put_uts(ctl_table *table, int write, void *which) +{ + if (!write) + up_read(&uts_sem); + else + up_write(&uts_sem); +} + +#ifdef CONFIG_SYSVIPC +static void *get_ipc(ctl_table *table, int write) +{ + char *which = table->data; + struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; + which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; + return which; +} +#else +#define get_ipc(T,W) ((T)->data) +#endif /* /proc declarations: */ -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_PROC_SYSCTL static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *); static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); static int proc_opensys(struct inode *, struct file *); -struct file_operations proc_sys_file_operations = { +const struct file_operations proc_sys_file_operations = { .open = proc_opensys, .read = proc_readsys, .write = proc_writesys, @@ -160,7 +245,7 @@ struct file_operations proc_sys_file_operations = { extern struct proc_dir_entry *proc_sys_root; -static void register_proc_table(ctl_table *, struct proc_dir_entry *); +static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *); static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); #endif @@ -187,12 +272,6 @@ static ctl_table root_table[] = { .child = net_table, }, #endif - { - .ctl_name = CTL_PROC, - .procname = "proc", - .mode = 0555, - .child = proc_table, - }, { .ctl_name = CTL_FS, .procname = "fs", @@ -211,6 +290,7 @@ static ctl_table root_table[] = { .mode = 0555, .child = dev_table, }, + { .ctl_name = 0 } }; @@ -218,47 +298,47 @@ static ctl_table kern_table[] = { { .ctl_name = KERN_OSTYPE, .procname = "ostype", - .data = system_utsname.sysname, - .maxlen = sizeof(system_utsname.sysname), + .data = init_uts_ns.name.sysname, + .maxlen = sizeof(init_uts_ns.name.sysname), .mode = 0444, - .proc_handler = &proc_doutsstring, - .strategy = &sysctl_string, + .proc_handler = &proc_do_uts_string, + .strategy = &sysctl_uts_string, }, { .ctl_name = KERN_OSRELEASE, .procname = "osrelease", - .data = system_utsname.release, - .maxlen = sizeof(system_utsname.release), + .data = init_uts_ns.name.release, + .maxlen = sizeof(init_uts_ns.name.release), .mode = 0444, - .proc_handler = &proc_doutsstring, - .strategy = &sysctl_string, + .proc_handler = &proc_do_uts_string, + .strategy = &sysctl_uts_string, }, { .ctl_name = KERN_VERSION, .procname = "version", - .data = system_utsname.version, - .maxlen = sizeof(system_utsname.version), + .data = init_uts_ns.name.version, + .maxlen = sizeof(init_uts_ns.name.version), .mode = 0444, - .proc_handler = &proc_doutsstring, - .strategy = &sysctl_string, + .proc_handler = &proc_do_uts_string, + .strategy = &sysctl_uts_string, }, { .ctl_name = KERN_NODENAME, .procname = "hostname", - .data = system_utsname.nodename, - .maxlen = sizeof(system_utsname.nodename), + .data = init_uts_ns.name.nodename, + .maxlen = sizeof(init_uts_ns.name.nodename), .mode = 0644, - .proc_handler = &proc_doutsstring, - .strategy = &sysctl_string, + .proc_handler = &proc_do_uts_string, + .strategy = &sysctl_uts_string, }, { .ctl_name = KERN_DOMAINNAME, .procname = "domainname", - .data = system_utsname.domainname, - .maxlen = sizeof(system_utsname.domainname), + .data = init_uts_ns.name.domainname, + .maxlen = sizeof(init_uts_ns.name.domainname), .mode = 0644, - .proc_handler = &proc_doutsstring, - .strategy = &sysctl_string, + .proc_handler = &proc_do_uts_string, + .strategy = &sysctl_uts_string, }, { .ctl_name = KERN_PANIC, @@ -268,6 +348,22 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = KERN_EXEC_SHIELD, + .procname = "exec-shield", + .data = &exec_shield, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = KERN_PRINT_FATAL, + .procname = "print-fatal-signals", + .data = &print_fatal_signals, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = KERN_CORE_USES_PID, .procname = "core_uses_pid", @@ -280,7 +376,7 @@ static ctl_table kern_table[] = { .ctl_name = KERN_CORE_PATTERN, .procname = "core_pattern", .data = core_pattern, - .maxlen = 64, + .maxlen = 128, .mode = 0644, .proc_handler = &proc_dostring, .strategy = &sysctl_string, @@ -290,7 +386,7 @@ static ctl_table kern_table[] = { .procname = "tainted", .data = &tainted, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0444, .proc_handler = &proc_dointvec, }, { @@ -355,22 +451,6 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, -#endif -#if defined(CONFIG_PPC32) && defined(CONFIG_6xx) - { - .ctl_name = KERN_PPC_POWERSAVE_NAP, - .procname = "powersave-nap", - .data = &powersave_nap, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = KERN_PPC_L2CR, - .procname = "l2cr", - .mode = 0644, - .proc_handler = &proc_dol2crvec, - }, #endif { .ctl_name = KERN_CTLALTDEL, @@ -399,12 +479,12 @@ static ctl_table kern_table[] = { .strategy = &sysctl_string, }, #endif -#ifdef CONFIG_HOTPLUG +#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) { .ctl_name = KERN_HOTPLUG, .procname = "hotplug", - .data = &hotplug_path, - .maxlen = KMOD_PATH_LEN, + .data = &uevent_helper, + .maxlen = UEVENT_HELPER_PATH_LEN, .mode = 0644, .proc_handler = &proc_dostring, .strategy = &sysctl_string, @@ -439,98 +519,91 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif - { - .ctl_name = KERN_RTSIGNR, - .procname = "rtsig-nr", - .data = &nr_queued_signals, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = KERN_RTSIGMAX, - .procname = "rtsig-max", - .data = &max_queued_signals, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, #ifdef CONFIG_SYSVIPC { .ctl_name = KERN_SHMMAX, .procname = "shmmax", - .data = &shm_ctlmax, - .maxlen = sizeof (size_t), + .data = &init_ipc_ns.shm_ctlmax, + .maxlen = sizeof (init_ipc_ns.shm_ctlmax), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = &proc_ipc_doulongvec_minmax, + .strategy = sysctl_ipc_data, }, { .ctl_name = KERN_SHMALL, .procname = "shmall", - .data = &shm_ctlall, - .maxlen = sizeof (size_t), + .data = &init_ipc_ns.shm_ctlall, + .maxlen = sizeof (init_ipc_ns.shm_ctlall), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = &proc_ipc_doulongvec_minmax, + .strategy = sysctl_ipc_data, }, { .ctl_name = KERN_SHMMNI, .procname = "shmmni", - .data = &shm_ctlmni, - .maxlen = sizeof (int), + .data = &init_ipc_ns.shm_ctlmni, + .maxlen = sizeof (init_ipc_ns.shm_ctlmni), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_ipc_dointvec, + .strategy = sysctl_ipc_data, }, { .ctl_name = KERN_MSGMAX, .procname = "msgmax", - .data = &msg_ctlmax, - .maxlen = sizeof (int), + .data = &init_ipc_ns.msg_ctlmax, + .maxlen = sizeof (init_ipc_ns.msg_ctlmax), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_ipc_dointvec, + .strategy = sysctl_ipc_data, }, { .ctl_name = KERN_MSGMNI, .procname = "msgmni", - .data = &msg_ctlmni, - .maxlen = sizeof (int), + .data = &init_ipc_ns.msg_ctlmni, + .maxlen = sizeof (init_ipc_ns.msg_ctlmni), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_ipc_dointvec, + .strategy = sysctl_ipc_data, }, { .ctl_name = KERN_MSGMNB, .procname = "msgmnb", - .data = &msg_ctlmnb, - .maxlen = sizeof (int), + .data = &init_ipc_ns.msg_ctlmnb, + .maxlen = sizeof (init_ipc_ns.msg_ctlmnb), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_ipc_dointvec, + .strategy = sysctl_ipc_data, }, { .ctl_name = KERN_SEM, .procname = "sem", - .data = &sem_ctls, + .data = &init_ipc_ns.sem_ctls, .maxlen = 4*sizeof (int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_ipc_dointvec, + .strategy = sysctl_ipc_data, }, #endif #ifdef CONFIG_MAGIC_SYSRQ { .ctl_name = KERN_SYSRQ, .procname = "sysrq", - .data = &sysrq_enabled, + .data = &__sysrq_enabled, .maxlen = sizeof (int), .mode = 0644, .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_PROC_SYSCTL { .ctl_name = KERN_CADPID, .procname = "cad_pid", - .data = &cad_pid, + .data = NULL, .maxlen = sizeof (int), .mode = 0600, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_do_cad_pid, }, +#endif { .ctl_name = KERN_MAX_THREADS, .procname = "threads-max", @@ -575,7 +648,7 @@ static ctl_table kern_table[] = { .extra1 = &minolduid, .extra2 = &maxolduid, }, -#ifdef CONFIG_ARCH_S390 +#ifdef CONFIG_S390 #ifdef CONFIG_MATHEMU { .ctl_name = KERN_IEEE_EMULATION_WARNINGS, @@ -611,7 +684,10 @@ static ctl_table kern_table[] = { .data = &pid_max, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_minmax, + .strategy = sysctl_intvec, + .extra1 = &pid_max_min, + .extra2 = &pid_max_max, }, { .ctl_name = KERN_PANIC_ON_OOPS, @@ -646,6 +722,111 @@ static ctl_table kern_table[] = { .mode = 0444, .proc_handler = &proc_dointvec, }, +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) + { + .ctl_name = KERN_UNKNOWN_NMI_PANIC, + .procname = "unknown_nmi_panic", + .data = &unknown_nmi_panic, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = KERN_NMI_WATCHDOG, + .procname = "nmi_watchdog", + .data = &nmi_watchdog_enabled, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_nmi_enabled, + }, +#endif +#if defined(CONFIG_X86) + { + .ctl_name = KERN_PANIC_ON_NMI, + .procname = "panic_on_unrecovered_nmi", + .data = &panic_on_unrecovered_nmi, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = KERN_BOOTLOADER_TYPE, + .procname = "bootloader_type", + .data = &bootloader_type, + .maxlen = sizeof (int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kstack_depth_to_print", + .data = &kstack_depth_to_print, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#if defined(CONFIG_MMU) + { + .ctl_name = KERN_RANDOMIZE, + .procname = "randomize_va_space", + .data = &randomize_va_space, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#if defined(CONFIG_S390) && defined(CONFIG_SMP) + { + .ctl_name = KERN_SPIN_RETRY, + .procname = "spin_retry", + .data = &spin_retry, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_ACPI_SLEEP + { + .ctl_name = KERN_ACPI_VIDEO_FLAGS, + .procname = "acpi_video_flags", + .data = &acpi_video_flags, + .maxlen = sizeof (unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, +#endif +#ifdef CONFIG_IA64 + { + .ctl_name = KERN_IA64_UNALIGNED, + .procname = "ignore-unaligned-usertrap", + .data = &no_unaligned_warning, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_COMPAT + { + .ctl_name = KERN_COMPAT_LOG, + .procname = "compat-log", + .data = &compat_log, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_RT_MUTEXES + { + .ctl_name = KERN_MAX_LOCK_DEPTH, + .procname = "max_lock_depth", + .data = &max_lock_depth, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif + { .ctl_name = 0 } }; @@ -664,6 +845,14 @@ static ctl_table vm_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = VM_PANIC_ON_OOM, + .procname = "panic_on_oom", + .data = &sysctl_panic_on_oom, + .maxlen = sizeof(sysctl_panic_on_oom), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = VM_OVERCOMMIT_RATIO, .procname = "overcommit_ratio", @@ -705,18 +894,18 @@ static ctl_table vm_table[] = { { .ctl_name = VM_DIRTY_WB_CS, .procname = "dirty_writeback_centisecs", - .data = &dirty_writeback_centisecs, - .maxlen = sizeof(dirty_writeback_centisecs), + .data = &dirty_writeback_interval, + .maxlen = sizeof(dirty_writeback_interval), .mode = 0644, .proc_handler = &dirty_writeback_centisecs_handler, }, { .ctl_name = VM_DIRTY_EXPIRE_CS, .procname = "dirty_expire_centisecs", - .data = &dirty_expire_centisecs, - .maxlen = sizeof(dirty_expire_centisecs), + .data = &dirty_expire_interval, + .maxlen = sizeof(dirty_expire_interval), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_userhz_jiffies, }, { .ctl_name = VM_NR_PDFLUSH_THREADS, @@ -758,14 +947,22 @@ static ctl_table vm_table[] = { }, #endif { - .ctl_name = VM_LOWER_ZONE_PROTECTION, - .procname = "lower_zone_protection", - .data = &sysctl_lower_zone_protection, - .maxlen = sizeof(sysctl_lower_zone_protection), + .ctl_name = VM_LOWMEM_RESERVE_RATIO, + .procname = "lowmem_reserve_ratio", + .data = &sysctl_lowmem_reserve_ratio, + .maxlen = sizeof(sysctl_lowmem_reserve_ratio), .mode = 0644, - .proc_handler = &lower_zone_protection_sysctl_handler, + .proc_handler = &lowmem_reserve_ratio_sysctl_handler, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = VM_DROP_PAGECACHE, + .procname = "drop_caches", + .data = &sysctl_drop_caches, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = drop_caches_sysctl_handler, .strategy = &sysctl_intvec, - .extra1 = &zero, }, { .ctl_name = VM_MIN_FREE_KBYTES, @@ -777,6 +974,17 @@ static ctl_table vm_table[] = { .strategy = &sysctl_intvec, .extra1 = &zero, }, + { + .ctl_name = VM_PERCPU_PAGELIST_FRACTION, + .procname = "percpu_pagelist_fraction", + .data = &percpu_pagelist_fraction, + .maxlen = sizeof(percpu_pagelist_fraction), + .mode = 0644, + .proc_handler = &percpu_pagelist_fraction_sysctl_handler, + .strategy = &sysctl_intvec, + .extra1 = &min_percpu_pagelist_fract, + }, +#ifdef CONFIG_MMU { .ctl_name = VM_MAX_MAP_COUNT, .procname = "max_map_count", @@ -785,15 +993,15 @@ static ctl_table vm_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, +#endif { .ctl_name = VM_LAPTOP_MODE, .procname = "laptop_mode", .data = &laptop_mode, .maxlen = sizeof(laptop_mode), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, - .extra1 = &zero, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, }, { .ctl_name = VM_BLOCK_DUMP, @@ -805,10 +1013,74 @@ static ctl_table vm_table[] = { .strategy = &sysctl_intvec, .extra1 = &zero, }, - { .ctl_name = 0 } -}; - -static ctl_table proc_table[] = { + { + .ctl_name = VM_VFS_CACHE_PRESSURE, + .procname = "vfs_cache_pressure", + .data = &sysctl_vfs_cache_pressure, + .maxlen = sizeof(sysctl_vfs_cache_pressure), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, +#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT + { + .ctl_name = VM_LEGACY_VA_LAYOUT, + .procname = "legacy_va_layout", + .data = &sysctl_legacy_va_layout, + .maxlen = sizeof(sysctl_legacy_va_layout), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, +#endif +#ifdef CONFIG_NUMA + { + .ctl_name = VM_ZONE_RECLAIM_MODE, + .procname = "zone_reclaim_mode", + .data = &zone_reclaim_mode, + .maxlen = sizeof(zone_reclaim_mode), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, + { + .ctl_name = VM_MIN_UNMAPPED, + .procname = "min_unmapped_ratio", + .data = &sysctl_min_unmapped_ratio, + .maxlen = sizeof(sysctl_min_unmapped_ratio), + .mode = 0644, + .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one_hundred, + }, + { + .ctl_name = VM_MIN_SLAB, + .procname = "min_slab_ratio", + .data = &sysctl_min_slab_ratio, + .maxlen = sizeof(sysctl_min_slab_ratio), + .mode = 0644, + .proc_handler = &sysctl_min_slab_ratio_sysctl_handler, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one_hundred, + }, +#endif +#ifdef CONFIG_X86_32 + { + .ctl_name = VM_VDSO_ENABLED, + .procname = "vdso_enabled", + .data = &vdso_enabled, + .maxlen = sizeof(vdso_enabled), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, +#endif { .ctl_name = 0 } }; @@ -835,7 +1107,7 @@ static ctl_table fs_table[] = { .data = &files_stat, .maxlen = 3*sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_nr_files, }, { .ctl_name = FS_MAXFILE, @@ -883,6 +1155,7 @@ static ctl_table fs_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_DNOTIFY { .ctl_name = FS_DIR_NOTIFY, .procname = "dir-notify-enable", @@ -891,6 +1164,8 @@ static ctl_table fs_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif +#ifdef CONFIG_MMU { .ctl_name = FS_LEASE_TIME, .procname = "lease-break-time", @@ -905,7 +1180,7 @@ static ctl_table fs_table[] = { .data = &aio_nr, .maxlen = sizeof(aio_nr), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_doulongvec_minmax, }, { .ctl_name = FS_AIO_MAX_NR, @@ -913,6 +1188,23 @@ static ctl_table fs_table[] = { .data = &aio_max_nr, .maxlen = sizeof(aio_max_nr), .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, +#ifdef CONFIG_INOTIFY_USER + { + .ctl_name = FS_INOTIFY, + .procname = "inotify", + .mode = 0555, + .child = inotify_table, + }, +#endif +#endif + { + .ctl_name = KERN_SETUID_DUMPABLE, + .procname = "suid_dumpable", + .data = &suid_dumpable, + .maxlen = sizeof(int), + .mode = 0644, .proc_handler = &proc_dointvec, }, { .ctl_name = 0 } @@ -924,45 +1216,94 @@ static ctl_table debug_table[] = { static ctl_table dev_table[] = { { .ctl_name = 0 } -}; +}; extern void init_irq_proc (void); -void __init sysctl_init(void) +static DEFINE_SPINLOCK(sysctl_lock); + +/* called under sysctl_lock */ +static int use_table(struct ctl_table_header *p) { -#ifdef CONFIG_PROC_FS - register_proc_table(root_table, proc_sys_root); - init_irq_proc(); -#endif + if (unlikely(p->unregistering)) + return 0; + p->used++; + return 1; } -int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) +/* called under sysctl_lock */ +static void unuse_table(struct ctl_table_header *p) { - struct list_head *tmp; + if (!--p->used) + if (unlikely(p->unregistering)) + complete(p->unregistering); +} - if (nlen <= 0 || nlen >= CTL_MAXNAME) - return -ENOTDIR; - if (oldval) { - int old_len; +/* called under sysctl_lock, will reacquire if has to wait */ +static void start_unregistering(struct ctl_table_header *p) +{ + /* + * if p->used is 0, nobody will ever touch that entry again; + * we'll eliminate all paths to it before dropping sysctl_lock + */ + if (unlikely(p->used)) { + struct completion wait; + init_completion(&wait); + p->unregistering = &wait; + spin_unlock(&sysctl_lock); + wait_for_completion(&wait); + spin_lock(&sysctl_lock); + } + /* + * do not remove from the list until nobody holds it; walking the + * list in do_sysctl() relies on that. + */ + list_del_init(&p->ctl_entry); +} + +void __init sysctl_init(void) +{ +#ifdef CONFIG_PROC_SYSCTL + register_proc_table(root_table, proc_sys_root, &root_table_header); + init_irq_proc(); +#endif +} + +#ifdef CONFIG_SYSCTL_SYSCALL +int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + struct list_head *tmp; + int error = -ENOTDIR; + + if (nlen <= 0 || nlen >= CTL_MAXNAME) + return -ENOTDIR; + if (oldval) { + int old_len; if (!oldlenp || get_user(old_len, oldlenp)) return -EFAULT; } + spin_lock(&sysctl_lock); tmp = &root_table_header.ctl_entry; do { struct ctl_table_header *head = list_entry(tmp, struct ctl_table_header, ctl_entry); - void *context = NULL; - int error = parse_table(name, nlen, oldval, oldlenp, - newval, newlen, head->ctl_table, - &context); - if (context) - kfree(context); + + if (!use_table(head)) + continue; + + spin_unlock(&sysctl_lock); + + error = parse_table(name, nlen, oldval, oldlenp, + newval, newlen, head->ctl_table); + + spin_lock(&sysctl_lock); + unuse_table(head); if (error != -ENOTDIR) - return error; - tmp = tmp->next; - } while (tmp != &root_table_header.ctl_entry); - return -ENOTDIR; + break; + } while ((tmp = tmp->next) != &root_table_header.ctl_entry); + spin_unlock(&sysctl_lock); + return error; } asmlinkage long sys_sysctl(struct __sysctl_args __user *args) @@ -979,6 +1320,7 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args) unlock_kernel(); return error; } +#endif /* CONFIG_SYSCTL_SYSCALL */ /* * ctl_perm does NOT grant the superuser all rights automatically, because @@ -1005,10 +1347,11 @@ static inline int ctl_perm(ctl_table *table, int op) return test_perm(table->mode, op); } +#ifdef CONFIG_SYSCTL_SYSCALL static int parse_table(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen, - ctl_table *table, void **context) + ctl_table *table) { int n; repeat: @@ -1016,7 +1359,9 @@ repeat: return -ENOTDIR; if (get_user(n, name)) return -EFAULT; - for ( ; table->ctl_name; table++) { + for ( ; table->ctl_name || table->procname; table++) { + if (!table->ctl_name) + continue; if (n == table->ctl_name || table->ctl_name == CTL_ANY) { int error; if (table->child) { @@ -1026,7 +1371,7 @@ repeat: error = table->strategy( table, name, nlen, oldval, oldlenp, - newval, newlen, context); + newval, newlen); if (error) return error; } @@ -1037,7 +1382,7 @@ repeat: } error = do_sysctl_strategy(table, name, nlen, oldval, oldlenp, - newval, newlen, context); + newval, newlen); return error; } } @@ -1048,7 +1393,7 @@ repeat: int do_sysctl_strategy (ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) + void __user *newval, size_t newlen) { int op = 0, rc; size_t len; @@ -1062,7 +1407,7 @@ int do_sysctl_strategy (ctl_table *table, if (table->strategy) { rc = table->strategy(table, name, nlen, oldval, oldlenp, - newval, newlen, context); + newval, newlen); if (rc < 0) return rc; if (rc > 0) @@ -1094,6 +1439,7 @@ int do_sysctl_strategy (ctl_table *table, } return 0; } +#endif /* CONFIG_SYSCTL_SYSCALL */ /** * register_sysctl_table - register a sysctl hierarchy @@ -1173,12 +1519,16 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, return NULL; tmp->ctl_table = table; INIT_LIST_HEAD(&tmp->ctl_entry); + tmp->used = 0; + tmp->unregistering = NULL; + spin_lock(&sysctl_lock); if (insert_at_head) list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); else list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); -#ifdef CONFIG_PROC_FS - register_proc_table(table, proc_sys_root); + spin_unlock(&sysctl_lock); +#ifdef CONFIG_PROC_SYSCTL + register_proc_table(table, proc_sys_root, tmp); #endif return tmp; } @@ -1192,27 +1542,43 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, */ void unregister_sysctl_table(struct ctl_table_header * header) { - list_del(&header->ctl_entry); -#ifdef CONFIG_PROC_FS + might_sleep(); + spin_lock(&sysctl_lock); + start_unregistering(header); +#ifdef CONFIG_PROC_SYSCTL unregister_proc_table(header->ctl_table, proc_sys_root); #endif + spin_unlock(&sysctl_lock); kfree(header); } +#else /* !CONFIG_SYSCTL */ +struct ctl_table_header * register_sysctl_table(ctl_table * table, + int insert_at_head) +{ + return NULL; +} + +void unregister_sysctl_table(struct ctl_table_header * table) +{ +} + +#endif /* CONFIG_SYSCTL */ + /* * /proc/sys support */ -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_PROC_SYSCTL /* Scan the sysctl entries in table and add them all into /proc */ -static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) +static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set) { struct proc_dir_entry *de; int len; mode_t mode; - for (; table->ctl_name; table++) { + for (; table->ctl_name || table->procname; table++) { /* Can't do anything without a proc name. */ if (!table->procname) continue; @@ -1242,13 +1608,14 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) de = create_proc_entry(table->procname, mode, root); if (!de) continue; + de->set = set; de->data = (void *) table; if (table->proc_handler) de->proc_fops = &proc_sys_file_operations; } table->de = de; if (de->mode & S_IFDIR) - register_proc_table(table->child, de); + register_proc_table(table->child, de, set); } } @@ -1258,7 +1625,7 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root) { struct proc_dir_entry *de; - for (; table->ctl_name; table++) { + for (; table->ctl_name || table->procname; table++) { if (!(de = table->de)) continue; if (de->mode & S_IFDIR) { @@ -1273,6 +1640,13 @@ static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root continue; } + /* + * In any case, mark the entry as goner; we'll keep it + * around if it's busy, but we'll know to do nothing with + * its fields. We are under sysctl_lock here. + */ + de->data = NULL; + /* Don't unregister proc entries that are still being used.. */ if (atomic_read(&de->count)) continue; @@ -1286,31 +1660,38 @@ static ssize_t do_rw_proc(int write, struct file * file, char __user * buf, size_t count, loff_t *ppos) { int op; - struct proc_dir_entry *de; + struct proc_dir_entry *de = PDE(file->f_path.dentry->d_inode); struct ctl_table *table; size_t res; - ssize_t error; - - de = PDE(file->f_dentry->d_inode); - if (!de || !de->data) - return -ENOTDIR; - table = (struct ctl_table *) de->data; - if (!table || !table->proc_handler) - return -ENOTDIR; - op = (write ? 002 : 004); - if (ctl_perm(table, op)) - return -EPERM; + ssize_t error = -ENOTDIR; - res = count; - - /* - * FIXME: we need to pass on ppos to the handler. - */ - - error = (*table->proc_handler) (table, write, file, buf, &res); - if (error) - return error; - return res; + spin_lock(&sysctl_lock); + if (de && de->data && use_table(de->set)) { + /* + * at that point we know that sysctl was not unregistered + * and won't be until we finish + */ + spin_unlock(&sysctl_lock); + table = (struct ctl_table *) de->data; + if (!table || !table->proc_handler) + goto out; + error = -EPERM; + op = (write ? 002 : 004); + if (ctl_perm(table, op)) + goto out; + + /* careful: calling conventions are nasty here */ + res = count; + error = (*table->proc_handler)(table, write, file, + buf, &res, ppos); + if (!error) + error = res; + out: + spin_lock(&sysctl_lock); + unuse_table(de->set); + } + spin_unlock(&sysctl_lock); + return error; } static int proc_opensys(struct inode *inode, struct file *file) @@ -1339,32 +1720,16 @@ static ssize_t proc_writesys(struct file * file, const char __user * buf, return do_rw_proc(1, file, (char __user *) buf, count, ppos); } -/** - * proc_dostring - read a string sysctl - * @table: the sysctl table - * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure - * @buffer: the user buffer - * @lenp: the size of the user buffer - * - * Reads/writes a string from/to the user buffer. If the kernel - * buffer provided is not large enough to hold the string, the - * string is truncated. The copied string is %NULL-terminated. - * If the string is being read by the user process, it is copied - * and a newline '\n' is added. It is truncated if the buffer is - * not large enough. - * - * Returns 0 on success. - */ -int proc_dostring(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) +static int _proc_do_string(void* data, int maxlen, int write, + struct file *filp, void __user *buffer, + size_t *lenp, loff_t *ppos) { size_t len; char __user *p; char c; - if (!table->data || !table->maxlen || !*lenp || - (filp->f_pos && !write)) { + if (!data || !maxlen || !*lenp || + (*ppos && !write)) { *lenp = 0; return 0; } @@ -1379,20 +1744,20 @@ int proc_dostring(ctl_table *table, int write, struct file *filp, break; len++; } - if (len >= table->maxlen) - len = table->maxlen-1; - if(copy_from_user(table->data, buffer, len)) + if (len >= maxlen) + len = maxlen-1; + if(copy_from_user(data, buffer, len)) return -EFAULT; - ((char *) table->data)[len] = 0; - filp->f_pos += *lenp; + ((char *) data)[len] = 0; + *ppos += *lenp; } else { - len = strlen(table->data); - if (len > table->maxlen) - len = table->maxlen; + len = strlen(data); + if (len > maxlen) + len = maxlen; if (len > *lenp) len = *lenp; if (len) - if(copy_to_user(buffer, table->data, len)) + if(copy_to_user(buffer, data, len)) return -EFAULT; if (len < *lenp) { if(put_user('\n', ((char __user *) buffer) + len)) @@ -1400,30 +1765,49 @@ int proc_dostring(ctl_table *table, int write, struct file *filp, len++; } *lenp = len; - filp->f_pos += len; + *ppos += len; } return 0; } +/** + * proc_dostring - read a string sysctl + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * @ppos: file position + * + * Reads/writes a string from/to the user buffer. If the kernel + * buffer provided is not large enough to hold the string, the + * string is truncated. The copied string is %NULL-terminated. + * If the string is being read by the user process, it is copied + * and a newline '\n' is added. It is truncated if the buffer is + * not large enough. + * + * Returns 0 on success. + */ +int proc_dostring(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return _proc_do_string(table->data, table->maxlen, write, filp, + buffer, lenp, ppos); +} + /* * Special case of dostring for the UTS structure. This has locks * to observe. Should this be in kernel/sys.c ???? */ - -static int proc_doutsstring(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + +static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int r; - - if (!write) { - down_read(&uts_sem); - r=proc_dostring(table,0,filp,buffer,lenp); - up_read(&uts_sem); - } else { - down_write(&uts_sem); - r=proc_dostring(table,1,filp,buffer,lenp); - up_write(&uts_sem); - } + void *which; + which = get_uts(table, write); + r = _proc_do_string(which, table->maxlen,write,filp,buffer,lenp, ppos); + put_uts(table, write, which); return r; } @@ -1446,13 +1830,14 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, return 0; } -static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, +static int __do_proc_dointvec(void *tbl_data, ctl_table *table, + int write, struct file *filp, void __user *buffer, + size_t *lenp, loff_t *ppos, int (*conv)(int *negp, unsigned long *lvalp, int *valp, int write, void *data), void *data) { -#define TMPBUFLEN 20 +#define TMPBUFLEN 21 int *i, vleft, first=1, neg, val; unsigned long lval; size_t left, len; @@ -1460,13 +1845,13 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, char buf[TMPBUFLEN], *p; char __user *s = buffer; - if (!table->data || !table->maxlen || !*lenp || - (filp->f_pos && !write)) { + if (!tbl_data || !table->maxlen || !*lenp || + (*ppos && !write)) { *lenp = 0; return 0; } - i = (int *) table->data; + i = (int *) tbl_data; vleft = table->maxlen / sizeof(*i); left = *lenp; @@ -1496,7 +1881,7 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, p = buf; if (*p == '-' && left > 1) { neg = 1; - left--, p++; + p++; } if (*p < '0' || *p > '9') break; @@ -1550,11 +1935,21 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, if (write && first) return -EINVAL; *lenp -= left; - filp->f_pos += *lenp; + *ppos += *lenp; return 0; #undef TMPBUFLEN } +static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos, + int (*conv)(int *negp, unsigned long *lvalp, int *valp, + int write, void *data), + void *data) +{ + return __do_proc_dointvec(table->data, table, write, filp, + buffer, lenp, ppos, conv, data); +} + /** * proc_dointvec - read a vector of integers * @table: the sysctl table @@ -1562,6 +1957,7 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer + * @ppos: file position * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer * values from/to the user buffer, treated as an ASCII string. @@ -1569,17 +1965,14 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, * Returns 0 on success. */ int proc_dointvec(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,filp,buffer,lenp, + return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, NULL,NULL); } #define OP_SET 0 #define OP_AND 1 -#define OP_OR 2 -#define OP_MAX 3 -#define OP_MIN 4 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, int *valp, @@ -1591,13 +1984,6 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, switch(op) { case OP_SET: *valp = val; break; case OP_AND: *valp &= val; break; - case OP_OR: *valp |= val; break; - case OP_MAX: if(*valp < val) - *valp = val; - break; - case OP_MIN: if(*valp > val) - *valp = val; - break; } } else { int val = *valp; @@ -1617,7 +2003,7 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, */ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) { int op; @@ -1625,8 +2011,8 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, return -EPERM; } - op = (current->pid == 1) ? OP_SET : OP_AND; - return do_proc_dointvec(table,write,filp,buffer,lenp, + op = is_init(current) ? OP_SET : OP_AND; + return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, do_proc_dointvec_bset_conv,&op); } @@ -1666,6 +2052,7 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer + * @ppos: file position * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer * values from/to the user buffer, treated as an ASCII string. @@ -1676,36 +2063,37 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, * Returns 0 on success. */ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct do_proc_dointvec_minmax_conv_param param = { .min = (int *) table->extra1, .max = (int *) table->extra2, }; - return do_proc_dointvec(table, write, filp, buffer, lenp, + return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, do_proc_dointvec_minmax_conv, ¶m); } -static int do_proc_doulongvec_minmax(ctl_table *table, int write, +static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, + void __user *buffer, + size_t *lenp, loff_t *ppos, unsigned long convmul, unsigned long convdiv) { -#define TMPBUFLEN 20 +#define TMPBUFLEN 21 unsigned long *i, *min, *max, val; int vleft, first=1, neg; size_t len, left; char buf[TMPBUFLEN], *p; char __user *s = buffer; - if (!table->data || !table->maxlen || !*lenp || - (filp->f_pos && !write)) { + if (!data || !table->maxlen || !*lenp || + (*ppos && !write)) { *lenp = 0; return 0; } - i = (unsigned long *) table->data; + i = (unsigned long *) data; min = (unsigned long *) table->extra1; max = (unsigned long *) table->extra2; vleft = table->maxlen / sizeof(unsigned long); @@ -1734,7 +2122,7 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, p = buf; if (*p == '-' && left > 1) { neg = 1; - left--, p++; + p++; } if (*p < '0' || *p > '9') break; @@ -1785,11 +2173,22 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, if (write && first) return -EINVAL; *lenp -= left; - filp->f_pos += *lenp; + *ppos += *lenp; return 0; #undef TMPBUFLEN } +static int do_proc_doulongvec_minmax(ctl_table *table, int write, + struct file *filp, + void __user *buffer, + size_t *lenp, loff_t *ppos, + unsigned long convmul, + unsigned long convdiv) +{ + return __do_proc_doulongvec_minmax(table->data, table, write, + filp, buffer, lenp, ppos, convmul, convdiv); +} + /** * proc_doulongvec_minmax - read a vector of long integers with min/max values * @table: the sysctl table @@ -1797,6 +2196,7 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer + * @ppos: file position * * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long * values from/to the user buffer, treated as an ASCII string. @@ -1807,9 +2207,9 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, * Returns 0 on success. */ int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, 1l, 1l); + return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l); } /** @@ -1819,6 +2219,7 @@ int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer + * @ppos: file position * * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long * values from/to the user buffer, treated as an ASCII string. The values @@ -1831,10 +2232,11 @@ int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, */ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, + size_t *lenp, loff_t *ppos) { return do_proc_doulongvec_minmax(table, write, filp, buffer, - lenp, HZ, 1000l); + lenp, ppos, HZ, 1000l); } @@ -1843,6 +2245,8 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, int write, void *data) { if (write) { + if (*lvalp > LONG_MAX / HZ) + return 1; *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); } else { int val = *valp; @@ -1864,6 +2268,8 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, int write, void *data) { if (write) { + if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ) + return 1; *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp); } else { int val = *valp; @@ -1880,6 +2286,27 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, return 0; } +static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, + int *valp, + int write, void *data) +{ + if (write) { + *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp); + } else { + int val = *valp; + unsigned long lval; + if (val < 0) { + *negp = -1; + lval = (unsigned long)-val; + } else { + *negp = 0; + lval = (unsigned long)val; + } + *lvalp = jiffies_to_msecs(lval); + } + return 0; +} + /** * proc_dointvec_jiffies - read a vector of integers as seconds * @table: the sysctl table @@ -1887,6 +2314,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer + * @ppos: file position * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer * values from/to the user buffer, treated as an ASCII string. @@ -1896,9 +2324,9 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, * Returns 0 on success. */ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,filp,buffer,lenp, + return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, do_proc_dointvec_jiffies_conv,NULL); } @@ -1909,6 +2337,7 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer + * @ppos: pointer to the file position * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer * values from/to the user buffer, treated as an ASCII string. @@ -1918,65 +2347,158 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, * Returns 0 on success. */ int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,filp,buffer,lenp, + return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, do_proc_dointvec_userhz_jiffies_conv,NULL); } +/** + * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * @ppos: file position + * @ppos: the current position in the file + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer + * values from/to the user buffer, treated as an ASCII string. + * The values read are assumed to be in 1/1000 seconds, and + * are converted into jiffies. + * + * Returns 0 on success. + */ +int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, + do_proc_dointvec_ms_jiffies_conv, NULL); +} + +#ifdef CONFIG_SYSVIPC +static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + void *which; + which = get_ipc(table, write); + return __do_proc_dointvec(which, table, write, filp, buffer, + lenp, ppos, NULL, NULL); +} + +static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) +{ + void *which; + which = get_ipc(table, write); + return __do_proc_doulongvec_minmax(which, table, write, filp, buffer, + lenp, ppos, 1l, 1l); +} + +#endif + +static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct pid *new_pid; + pid_t tmp; + int r; + + tmp = pid_nr(cad_pid); + + r = __do_proc_dointvec(&tmp, table, write, filp, buffer, + lenp, ppos, NULL, NULL); + if (r || !write) + return r; + + new_pid = find_get_pid(tmp); + if (!new_pid) + return -ESRCH; + + put_pid(xchg(&cad_pid, new_pid)); + return 0; +} + #else /* CONFIG_PROC_FS */ int proc_dostring(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} + +static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -static int proc_doutsstring(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) +#ifdef CONFIG_SYSVIPC +static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} +static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } +static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, + struct file *filp, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} +#endif int proc_dointvec(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} + +int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) + void __user *buffer, + size_t *lenp, loff_t *ppos) { return -ENOSYS; } @@ -1985,6 +2507,7 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, #endif /* CONFIG_PROC_FS */ +#ifdef CONFIG_SYSCTL_SYSCALL /* * General sysctl support routines */ @@ -1992,31 +2515,34 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, /* The generic string strategy routine: */ int sysctl_string(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) + void __user *newval, size_t newlen) { - size_t l, len; - if (!table->data || !table->maxlen) return -ENOTDIR; if (oldval && oldlenp) { - if (get_user(len, oldlenp)) + size_t bufsize; + if (get_user(bufsize, oldlenp)) return -EFAULT; - if (len) { - l = strlen(table->data); - if (len > l) len = l; - if (len >= table->maxlen) + if (bufsize) { + size_t len = strlen(table->data), copied; + + /* This shouldn't trigger for a well-formed sysctl */ + if (len > table->maxlen) len = table->maxlen; - if(copy_to_user(oldval, table->data, len)) - return -EFAULT; - if(put_user(0, ((char __user *) oldval) + len)) + + /* Copy up to a max of bufsize-1 bytes of the string */ + copied = (len >= bufsize) ? bufsize - 1 : len; + + if (copy_to_user(oldval, table->data, copied) || + put_user(0, (char __user *)(oldval + copied))) return -EFAULT; - if(put_user(len, oldlenp)) + if (put_user(len, oldlenp)) return -EFAULT; } } if (newval && newlen) { - len = newlen; + size_t len = newlen; if (len > table->maxlen) len = table->maxlen; if(copy_from_user(table->data, newval, len)) @@ -2025,7 +2551,7 @@ int sysctl_string(ctl_table *table, int __user *name, int nlen, len--; ((char *) table->data)[len] = 0; } - return 0; + return 1; } /* @@ -2035,7 +2561,7 @@ int sysctl_string(ctl_table *table, int __user *name, int nlen, */ int sysctl_intvec(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) + void __user *newval, size_t newlen) { if (newval && newlen) { @@ -2071,7 +2597,7 @@ int sysctl_intvec(ctl_table *table, int __user *name, int nlen, /* Strategy function to convert jiffies to seconds */ int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) + void __user *newval, size_t newlen) { if (oldval) { size_t olen; @@ -2096,96 +2622,169 @@ int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, return 1; } - -#else /* CONFIG_SYSCTL */ - - -asmlinkage long sys_sysctl(struct __sysctl_args __user *args) +/* Strategy function to convert jiffies to seconds */ +int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) { - return -ENOSYS; + if (oldval) { + size_t olen; + if (oldlenp) { + if (get_user(olen, oldlenp)) + return -EFAULT; + if (olen!=sizeof(int)) + return -EINVAL; + } + if (put_user(jiffies_to_msecs(*(int *)(table->data)), (int __user *)oldval) || + (oldlenp && put_user(sizeof(int),oldlenp))) + return -EFAULT; + } + if (newval && newlen) { + int new; + if (newlen != sizeof(int)) + return -EINVAL; + if (get_user(new, (int __user *)newval)) + return -EFAULT; + *(int *)(table->data) = msecs_to_jiffies(new); + } + return 1; } -int sysctl_string(ctl_table *table, int __user *name, int nlen, + +/* The generic string strategy routine: */ +static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) + void __user *newval, size_t newlen) { - return -ENOSYS; + struct ctl_table uts_table; + int r, write; + write = newval && newlen; + memcpy(&uts_table, table, sizeof(uts_table)); + uts_table.data = get_uts(table, write); + r = sysctl_string(&uts_table, name, nlen, + oldval, oldlenp, newval, newlen); + put_uts(table, write, uts_table.data); + return r; } -int sysctl_intvec(ctl_table *table, int __user *name, int nlen, +#ifdef CONFIG_SYSVIPC +/* The generic sysctl ipc data routine. */ +static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) + void __user *newval, size_t newlen) { - return -ENOSYS; -} + size_t len; + void *data; -int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) -{ - return -ENOSYS; -} + /* Get out of I don't have a variable */ + if (!table->data || !table->maxlen) + return -ENOTDIR; -int proc_dostring(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) -{ - return -ENOSYS; -} + data = get_ipc(table, 1); + if (!data) + return -ENOTDIR; -int proc_dointvec(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) -{ - return -ENOSYS; -} + if (oldval && oldlenp) { + if (get_user(len, oldlenp)) + return -EFAULT; + if (len) { + if (len > table->maxlen) + len = table->maxlen; + if (copy_to_user(oldval, data, len)) + return -EFAULT; + if (put_user(len, oldlenp)) + return -EFAULT; + } + } -int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) -{ - return -ENOSYS; + if (newval && newlen) { + if (newlen > table->maxlen) + newlen = table->maxlen; + + if (copy_from_user(data, newval, newlen)) + return -EFAULT; + } + return 1; } +#endif -int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) +#else /* CONFIG_SYSCTL_SYSCALL */ + + +asmlinkage long sys_sysctl(struct __sysctl_args __user *args) { + static int msg_count; + struct __sysctl_args tmp; + int name[CTL_MAXNAME]; + int i; + + /* Read in the sysctl name for better debug message logging */ + if (copy_from_user(&tmp, args, sizeof(tmp))) + return -EFAULT; + if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME) + return -ENOTDIR; + for (i = 0; i < tmp.nlen; i++) + if (get_user(name[i], tmp.name + i)) + return -EFAULT; + + /* Ignore accesses to kernel.version */ + if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION)) + goto out; + + if (msg_count < 5) { + msg_count++; + printk(KERN_INFO + "warning: process `%s' used the removed sysctl " + "system call with ", current->comm); + for (i = 0; i < tmp.nlen; i++) + printk("%d.", name[i]); + printk("\n"); + } +out: return -ENOSYS; } -int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) +int sysctl_string(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) { return -ENOSYS; } -int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) +int sysctl_intvec(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) { return -ENOSYS; } -int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp) +int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) { return -ENOSYS; } -int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, - struct file *filp, - void __user *buffer, size_t *lenp) +int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) { - return -ENOSYS; + return -ENOSYS; } -struct ctl_table_header * register_sysctl_table(ctl_table * table, - int insert_at_head) +static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) { - return 0; + return -ENOSYS; } - -void unregister_sysctl_table(struct ctl_table_header * table) +static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) { + return -ENOSYS; } - -#endif /* CONFIG_SYSCTL */ +#endif /* CONFIG_SYSCTL_SYSCALL */ /* * No sense putting this after each symbol definition, twice, @@ -2195,11 +2794,13 @@ EXPORT_SYMBOL(proc_dointvec); EXPORT_SYMBOL(proc_dointvec_jiffies); EXPORT_SYMBOL(proc_dointvec_minmax); EXPORT_SYMBOL(proc_dointvec_userhz_jiffies); +EXPORT_SYMBOL(proc_dointvec_ms_jiffies); EXPORT_SYMBOL(proc_dostring); EXPORT_SYMBOL(proc_doulongvec_minmax); EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); EXPORT_SYMBOL(register_sysctl_table); EXPORT_SYMBOL(sysctl_intvec); EXPORT_SYMBOL(sysctl_jiffies); +EXPORT_SYMBOL(sysctl_ms_jiffies); EXPORT_SYMBOL(sysctl_string); EXPORT_SYMBOL(unregister_sysctl_table);