From fd2d81bb2830cfc236fab5a7a2ea176a771b1d91 Mon Sep 17 00:00:00 2001 From: Andy Bavier Date: Fri, 20 Jun 2008 15:27:53 +0000 Subject: [PATCH] Removed .orig files --- linux-2.6-520-vnet+.patch | 437 - linux-2.6-700-trellis-mm1-netns.patch | 14720 +----------------------- 2 files changed, 268 insertions(+), 14889 deletions(-) diff --git a/linux-2.6-520-vnet+.patch b/linux-2.6-520-vnet+.patch index 04b0719dc..e57d417ae 100644 --- a/linux-2.6-520-vnet+.patch +++ b/linux-2.6-520-vnet+.patch @@ -90,154 +90,6 @@ diff -Nurb linux-2.6.22-510/include/linux/vserver/network.h linux-2.6.22-520/inc /* address types */ -diff -Nurb linux-2.6.22-510/include/linux/vserver/network.h.orig.orig linux-2.6.22-520/include/linux/vserver/network.h.orig.orig ---- linux-2.6.22-510/include/linux/vserver/network.h.orig.orig 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.22-520/include/linux/vserver/network.h.orig.orig 2008-03-20 01:27:27.000000000 -0400 -@@ -0,0 +1,143 @@ -+#ifndef _VX_NETWORK_H -+#define _VX_NETWORK_H -+ -+#include -+ -+ -+#define MAX_N_CONTEXT 65535 /* Arbitrary limit */ -+ -+ -+/* network flags */ -+ -+#define NXF_INFO_PRIVATE 0x00000008 -+ -+#define NXF_SINGLE_IP 0x00000100 -+#define NXF_LBACK_REMAP 0x00000200 -+ -+#define NXF_HIDE_NETIF 0x02000000 -+#define NXF_HIDE_LBACK 0x04000000 -+ -+#define NXF_STATE_SETUP (1ULL << 32) -+#define NXF_STATE_ADMIN (1ULL << 34) -+ -+#define NXF_SC_HELPER (1ULL << 36) -+#define NXF_PERSISTENT (1ULL << 38) -+ -+#define NXF_ONE_TIME (0x0005ULL << 32) -+ -+ -+#define NXF_INIT_SET (__nxf_init_set()) -+ -+static inline uint64_t __nxf_init_set(void) { -+ return NXF_STATE_ADMIN -+#ifdef CONFIG_VSERVER_AUTO_LBACK -+ | NXF_LBACK_REMAP -+ | NXF_HIDE_LBACK -+#endif -+#ifdef CONFIG_VSERVER_AUTO_SINGLE -+ | NXF_SINGLE_IP -+#endif -+ | NXF_HIDE_NETIF; -+} -+ -+ -+/* network caps */ -+ -+#define NXC_RAW_ICMP 0x00000100 -+ -+ -+/* address types */ -+ -+#define NXA_TYPE_IPV4 0x0001 -+#define NXA_TYPE_IPV6 0x0002 -+ -+#define NXA_TYPE_NONE 0x0000 -+#define NXA_TYPE_ANY 0x00FF -+ -+#define NXA_TYPE_ADDR 0x0010 -+#define NXA_TYPE_MASK 0x0020 -+#define NXA_TYPE_RANGE 0x0040 -+ -+#define NXA_MASK_ALL (NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE) -+ -+#define NXA_MOD_BCAST 0x0100 -+#define NXA_MOD_LBACK 0x0200 -+ -+#define NXA_LOOPBACK 0x1000 -+ -+#define NXA_MASK_BIND (NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK) -+#define NXA_MASK_SHOW (NXA_MASK_ALL | NXA_LOOPBACK) -+ -+#ifdef __KERNEL__ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+struct nx_addr_v4 { -+ struct nx_addr_v4 *next; -+ struct in_addr ip[2]; -+ struct in_addr mask; -+ uint16_t type; -+ uint16_t flags; -+}; -+ -+struct nx_addr_v6 { -+ struct nx_addr_v6 *next; -+ struct in6_addr ip; -+ struct in6_addr mask; -+ uint32_t prefix; -+ uint16_t type; -+ uint16_t flags; -+}; -+ -+struct nx_info { -+ struct hlist_node nx_hlist; /* linked list of nxinfos */ -+ nid_t nx_id; /* vnet id */ -+ atomic_t nx_usecnt; /* usage count */ -+ atomic_t nx_tasks; /* tasks count */ -+ int nx_state; /* context state */ -+ -+ uint64_t nx_flags; /* network flag word */ -+ uint64_t nx_ncaps; /* network capabilities */ -+ -+ struct in_addr v4_lback; /* Loopback address */ -+ struct in_addr v4_bcast; /* Broadcast address */ -+ struct nx_addr_v4 v4; /* First/Single ipv4 address */ -+#ifdef CONFIG_IPV6 -+ struct nx_addr_v6 v6; /* First/Single ipv6 address */ -+#endif -+ char nx_name[65]; /* network context name */ -+}; -+ -+ -+/* status flags */ -+ -+#define NXS_HASHED 0x0001 -+#define NXS_SHUTDOWN 0x0100 -+#define NXS_RELEASED 0x8000 -+ -+extern struct nx_info *lookup_nx_info(int); -+ -+extern int get_nid_list(int, unsigned int *, int); -+extern int nid_is_hashed(nid_t); -+ -+extern int nx_migrate_task(struct task_struct *, struct nx_info *); -+ -+extern long vs_net_change(struct nx_info *, unsigned int); -+ -+struct sock; -+ -+ -+#define NX_IPV4(n) ((n)->v4.type != NXA_TYPE_NONE) -+#ifdef CONFIG_IPV6 -+#define NX_IPV6(n) ((n)->v6.type != NXA_TYPE_NONE) -+#else -+#define NX_IPV6(n) (0) -+#endif -+ -+#endif /* __KERNEL__ */ -+#endif /* _VX_NETWORK_H */ -diff -Nurb linux-2.6.22-510/include/net/netfilter/nf_conntrack.h linux-2.6.22-520/include/net/netfilter/nf_conntrack.h --- linux-2.6.22-510/include/net/netfilter/nf_conntrack.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/include/net/netfilter/nf_conntrack.h 2008-03-20 01:27:27.000000000 -0400 @@ -131,6 +131,9 @@ @@ -645,295 +497,6 @@ diff -Nurb linux-2.6.22-510/net/netfilter/xt_MARK.c linux-2.6.22-520/net/netfilt printk(KERN_WARNING "MARK: unknown mode %u\n", markinfo->mode); return 0; -diff -Nurb linux-2.6.22-510/net/netfilter/xt_MARK.c.orig linux-2.6.22-520/net/netfilter/xt_MARK.c.orig ---- linux-2.6.22-510/net/netfilter/xt_MARK.c.orig 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.22-520/net/netfilter/xt_MARK.c.orig 2008-03-20 01:27:27.000000000 -0400 -@@ -0,0 +1,284 @@ -+/* This is a module which is used for setting the NFMARK field of an skb. */ -+ -+/* (C) 1999-2001 Marc Boucher -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Marc Boucher "); -+MODULE_DESCRIPTION("ip[6]tables MARK modification module"); -+MODULE_ALIAS("ipt_MARK"); -+MODULE_ALIAS("ip6t_MARK"); -+ -+static inline u_int16_t -+get_dst_port(struct nf_conntrack_tuple *tuple) -+{ -+ switch (tuple->dst.protonum) { -+ case IPPROTO_GRE: -+ /* XXX Truncate 32-bit GRE key to 16 bits */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11) -+ return tuple->dst.u.gre.key; -+#else -+ return htons(ntohl(tuple->dst.u.gre.key)); -+#endif -+ case IPPROTO_ICMP: -+ /* Bind on ICMP echo ID */ -+ return tuple->src.u.icmp.id; -+ case IPPROTO_TCP: -+ return tuple->dst.u.tcp.port; -+ case IPPROTO_UDP: -+ return tuple->dst.u.udp.port; -+ default: -+ return tuple->dst.u.all; -+ } -+} -+ -+static inline u_int16_t -+get_src_port(struct nf_conntrack_tuple *tuple) -+{ -+ switch (tuple->dst.protonum) { -+ case IPPROTO_GRE: -+ /* XXX Truncate 32-bit GRE key to 16 bits */ -+ return htons(ntohl(tuple->src.u.gre.key)); -+ case IPPROTO_ICMP: -+ /* Bind on ICMP echo ID */ -+ return tuple->src.u.icmp.id; -+ case IPPROTO_TCP: -+ return tuple->src.u.tcp.port; -+ case IPPROTO_UDP: -+ return tuple->src.u.udp.port; -+ default: -+ return tuple->src.u.all; -+ } -+} -+ -+static unsigned int -+target_v0(struct sk_buff **pskb, -+ const struct net_device *in, -+ const struct net_device *out, -+ unsigned int hooknum, -+ const struct xt_target *target, -+ const void *targinfo) -+{ -+ const struct xt_mark_target_info *markinfo = targinfo; -+ -+ (*pskb)->mark = markinfo->mark; -+ return XT_CONTINUE; -+} -+ -+static unsigned int -+target_v1(struct sk_buff **pskb, -+ const struct net_device *in, -+ const struct net_device *out, -+ unsigned int hooknum, -+ const struct xt_target *target, -+ const void *targinfo) -+{ -+ const struct xt_mark_target_info_v1 *markinfo = targinfo; -+ int mark = -1; -+ -+ switch (markinfo->mode) { -+ case XT_MARK_SET: -+ mark = markinfo->mark; -+ break; -+ -+ case XT_MARK_AND: -+ mark = (*pskb)->mark & markinfo->mark; -+ break; -+ -+ case XT_MARK_OR: -+ mark = (*pskb)->mark | markinfo->mark; -+ break; -+ -+ case XT_MARK_COPYXID: { -+ enum ip_conntrack_info ctinfo; -+ struct sock *connection_sk=NULL; -+ int dif; -+ -+ struct nf_conn *ct = nf_ct_get((*pskb), &ctinfo); -+ extern struct inet_hashinfo tcp_hashinfo; -+ enum ip_conntrack_dir dir; -+ if (!ct) -+ break; -+ -+ dir = CTINFO2DIR(ctinfo); -+ u_int32_t src_ip = ct->tuplehash[dir].tuple.src.u3.ip; -+ u_int16_t src_port = get_src_port(&ct->tuplehash[dir].tuple); -+ u_int16_t proto = ct->tuplehash[dir].tuple.dst.protonum; -+ -+ u_int32_t ip; -+ u_int16_t port; -+ -+ dif = ((struct rtable *)(*pskb)->dst)->rt_iif; -+ ip = ct->tuplehash[dir].tuple.dst.u3.ip; -+ port = get_dst_port(&ct->tuplehash[dir].tuple); -+ -+ if (proto == 1) { -+ if (((*pskb)->mark!=-1) && (*pskb)->mark) -+ ct->xid[0]=(*pskb)->mark; -+ if (ct->xid[0]) -+ mark = ct->xid[0]; -+ printk(KERN_CRIT "%d %d\n",ct->xid[0],(*pskb)->mark); -+ -+ } -+ else if (proto == 6) { -+ if ((*pskb)->sk) -+ connection_sk = (*pskb)->sk; -+ else { -+ connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif); -+ } -+ -+ if (connection_sk) { -+ connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[dir]; -+ ct->xid[!dir]=connection_sk->sk_xid; -+ if (connection_sk->sk_xid != 0) -+ mark = connection_sk->sk_xid; -+ if (connection_sk != (*pskb)->sk) -+ sock_put(connection_sk); -+ } -+ break; -+ } -+ } -+ } -+ -+ if (mark != -1) -+ (*pskb)->mark = mark; -+ return XT_CONTINUE; -+} -+ -+ -+static int -+checkentry_v0(const char *tablename, -+ const void *entry, -+ const struct xt_target *target, -+ void *targinfo, -+ unsigned int hook_mask) -+{ -+ struct xt_mark_target_info *markinfo = targinfo; -+ -+ if (markinfo->mark > 0xffffffff) { -+ printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); -+ return 0; -+ } -+ return 1; -+} -+ -+static int -+checkentry_v1(const char *tablename, -+ const void *entry, -+ const struct xt_target *target, -+ void *targinfo, -+ unsigned int hook_mask) -+{ -+ struct xt_mark_target_info_v1 *markinfo = targinfo; -+ -+ if (markinfo->mode != XT_MARK_SET -+ && markinfo->mode != XT_MARK_AND -+ && markinfo->mode != XT_MARK_OR -+ && markinfo->mode != XT_MARK_COPYXID) { -+ printk(KERN_WARNING "MARK: unknown mode %u\n", -+ markinfo->mode); -+ return 0; -+ } -+ if (markinfo->mark > 0xffffffff) { -+ printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); -+ return 0; -+ } -+ return 1; -+} -+ -+#ifdef CONFIG_COMPAT -+struct compat_xt_mark_target_info_v1 { -+ compat_ulong_t mark; -+ u_int8_t mode; -+ u_int8_t __pad1; -+ u_int16_t __pad2; -+}; -+ -+static void compat_from_user_v1(void *dst, void *src) -+{ -+ struct compat_xt_mark_target_info_v1 *cm = src; -+ struct xt_mark_target_info_v1 m = { -+ .mark = cm->mark, -+ .mode = cm->mode, -+ }; -+ memcpy(dst, &m, sizeof(m)); -+} -+ -+static int compat_to_user_v1(void __user *dst, void *src) -+{ -+ struct xt_mark_target_info_v1 *m = src; -+ struct compat_xt_mark_target_info_v1 cm = { -+ .mark = m->mark, -+ .mode = m->mode, -+ }; -+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; -+} -+#endif /* CONFIG_COMPAT */ -+ -+static struct xt_target xt_mark_target[] = { -+ { -+ .name = "MARK", -+ .family = AF_INET, -+ .revision = 0, -+ .checkentry = checkentry_v0, -+ .target = target_v0, -+ .targetsize = sizeof(struct xt_mark_target_info), -+ .table = "mangle", -+ .me = THIS_MODULE, -+ }, -+ { -+ .name = "MARK", -+ .family = AF_INET, -+ .revision = 1, -+ .checkentry = checkentry_v1, -+ .target = target_v1, -+ .targetsize = sizeof(struct xt_mark_target_info_v1), -+#ifdef CONFIG_COMPAT -+ .compatsize = sizeof(struct compat_xt_mark_target_info_v1), -+ .compat_from_user = compat_from_user_v1, -+ .compat_to_user = compat_to_user_v1, -+#endif -+ .table = "mangle", -+ .me = THIS_MODULE, -+ }, -+ { -+ .name = "MARK", -+ .family = AF_INET6, -+ .revision = 0, -+ .checkentry = checkentry_v0, -+ .target = target_v0, -+ .targetsize = sizeof(struct xt_mark_target_info), -+ .table = "mangle", -+ .me = THIS_MODULE, -+ }, -+}; -+ -+static int __init xt_mark_init(void) -+{ -+ return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); -+} -+ -+static void __exit xt_mark_fini(void) -+{ -+ xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); -+} -+ -+module_init(xt_mark_init); -+module_exit(xt_mark_fini); -diff -Nurb linux-2.6.22-510/net/netfilter/xt_SETXID.c linux-2.6.22-520/net/netfilter/xt_SETXID.c --- linux-2.6.22-510/net/netfilter/xt_SETXID.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-520/net/netfilter/xt_SETXID.c 2008-03-20 01:27:27.000000000 -0400 @@ -0,0 +1,79 @@ diff --git a/linux-2.6-700-trellis-mm1-netns.patch b/linux-2.6-700-trellis-mm1-netns.patch index d83f37350..e044429be 100644 --- a/linux-2.6-700-trellis-mm1-netns.patch +++ b/linux-2.6-700-trellis-mm1-netns.patch @@ -2661,1503 +2661,6 @@ diff -Nurb linux-2.6.22-570/Makefile linux-2.6.22-590/Makefile ifdef CONFIG_DEBUG_INFO CFLAGS += -g endif -diff -Nurb linux-2.6.22-570/Makefile.orig linux-2.6.22-590/Makefile.orig ---- linux-2.6.22-570/Makefile.orig 2008-03-20 13:25:40.000000000 -0400 -+++ linux-2.6.22-590/Makefile.orig 1969-12-31 19:00:00.000000000 -0500 -@@ -1,1493 +0,0 @@ --VERSION = 2 --PATCHLEVEL = 6 --SUBLEVEL = 22 --EXTRAVERSION = .14 --NAME = Holy Dancing Manatees, Batman! -- --# *DOCUMENTATION* --# To see a list of typical targets execute "make help" --# More info can be located in ./README --# Comments in this file are targeted only to the developer, do not --# expect to learn how to build the kernel reading this file. -- --# Do not: --# o use make's built-in rules and variables --# (this increases performance and avoid hard-to-debug behavour); --# o print "Entering directory ..."; --MAKEFLAGS += -rR --no-print-directory -- --# We are using a recursive build, so we need to do a little thinking --# to get the ordering right. --# --# Most importantly: sub-Makefiles should only ever modify files in --# their own directory. If in some directory we have a dependency on --# a file in another dir (which doesn't happen often, but it's often --# unavoidable when linking the built-in.o targets which finally --# turn into vmlinux), we will call a sub make in that other dir, and --# after that we are sure that everything which is in that other dir --# is now up to date. --# --# The only cases where we need to modify files which have global --# effects are thus separated out and done before the recursive --# descending is started. They are now explicitly listed as the --# prepare rule. -- --# To put more focus on warnings, be less verbose as default --# Use 'make V=1' to see the full commands -- --ifdef V -- ifeq ("$(origin V)", "command line") -- KBUILD_VERBOSE = $(V) -- endif --endif --ifndef KBUILD_VERBOSE -- KBUILD_VERBOSE = 0 --endif -- --# Call a source code checker (by default, "sparse") as part of the --# C compilation. --# --# Use 'make C=1' to enable checking of only re-compiled files. --# Use 'make C=2' to enable checking of *all* source files, regardless --# of whether they are re-compiled or not. --# --# See the file "Documentation/sparse.txt" for more details, including --# where to get the "sparse" utility. -- --ifdef C -- ifeq ("$(origin C)", "command line") -- KBUILD_CHECKSRC = $(C) -- endif --endif --ifndef KBUILD_CHECKSRC -- KBUILD_CHECKSRC = 0 --endif -- --# Use make M=dir to specify directory of external module to build --# Old syntax make ... SUBDIRS=$PWD is still supported --# Setting the environment variable KBUILD_EXTMOD take precedence --ifdef SUBDIRS -- KBUILD_EXTMOD ?= $(SUBDIRS) --endif --ifdef M -- ifeq ("$(origin M)", "command line") -- KBUILD_EXTMOD := $(M) -- endif --endif -- -- --# kbuild supports saving output files in a separate directory. --# To locate output files in a separate directory two syntaxes are supported. --# In both cases the working directory must be the root of the kernel src. --# 1) O= --# Use "make O=dir/to/store/output/files/" --# --# 2) Set KBUILD_OUTPUT --# Set the environment variable KBUILD_OUTPUT to point to the directory --# where the output files shall be placed. --# export KBUILD_OUTPUT=dir/to/store/output/files/ --# make --# --# The O= assignment takes precedence over the KBUILD_OUTPUT environment --# variable. -- -- --# KBUILD_SRC is set on invocation of make in OBJ directory --# KBUILD_SRC is not intended to be used by the regular user (for now) --ifeq ($(KBUILD_SRC),) -- --# OK, Make called in directory where kernel src resides --# Do we want to locate output files in a separate directory? --ifdef O -- ifeq ("$(origin O)", "command line") -- KBUILD_OUTPUT := $(O) -- endif --endif -- --# That's our default target when none is given on the command line --PHONY := _all --_all: -- --ifneq ($(KBUILD_OUTPUT),) --# Invoke a second make in the output directory, passing relevant variables --# check that the output directory actually exists --saved-output := $(KBUILD_OUTPUT) --KBUILD_OUTPUT := $(shell cd $(KBUILD_OUTPUT) && /bin/pwd) --$(if $(KBUILD_OUTPUT),, \ -- $(error output directory "$(saved-output)" does not exist)) -- --PHONY += $(MAKECMDGOALS) -- --$(filter-out _all,$(MAKECMDGOALS)) _all: -- $(if $(KBUILD_VERBOSE:1=),@)$(MAKE) -C $(KBUILD_OUTPUT) \ -- KBUILD_SRC=$(CURDIR) \ -- KBUILD_EXTMOD="$(KBUILD_EXTMOD)" -f $(CURDIR)/Makefile $@ -- --# Leave processing to above invocation of make --skip-makefile := 1 --endif # ifneq ($(KBUILD_OUTPUT),) --endif # ifeq ($(KBUILD_SRC),) -- --# We process the rest of the Makefile if this is the final invocation of make --ifeq ($(skip-makefile),) -- --# If building an external module we do not care about the all: rule --# but instead _all depend on modules --PHONY += all --ifeq ($(KBUILD_EXTMOD),) --_all: all --else --_all: modules --endif -- --srctree := $(if $(KBUILD_SRC),$(KBUILD_SRC),$(CURDIR)) --TOPDIR := $(srctree) --# FIXME - TOPDIR is obsolete, use srctree/objtree --objtree := $(CURDIR) --src := $(srctree) --obj := $(objtree) -- --VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD)) -- --export srctree objtree VPATH TOPDIR -- -- --# SUBARCH tells the usermode build what the underlying arch is. That is set --# first, and if a usermode build is happening, the "ARCH=um" on the command --# line overrides the setting of ARCH below. If a native build is happening, --# then ARCH is assigned, getting whatever value it gets normally, and --# SUBARCH is subsequently ignored. -- --SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ -- -e s/arm.*/arm/ -e s/sa110/arm/ \ -- -e s/s390x/s390/ -e s/parisc64/parisc/ \ -- -e s/ppc.*/powerpc/ -e s/mips.*/mips/ ) -- --# Cross compiling and selecting different set of gcc/bin-utils --# --------------------------------------------------------------------------- --# --# When performing cross compilation for other architectures ARCH shall be set --# to the target architecture. (See arch/* for the possibilities). --# ARCH can be set during invocation of make: --# make ARCH=ia64 --# Another way is to have ARCH set in the environment. --# The default ARCH is the host where make is executed. -- --# CROSS_COMPILE specify the prefix used for all executables used --# during compilation. Only gcc and related bin-utils executables --# are prefixed with $(CROSS_COMPILE). --# CROSS_COMPILE can be set on the command line --# make CROSS_COMPILE=ia64-linux- --# Alternatively CROSS_COMPILE can be set in the environment. --# Default value for CROSS_COMPILE is not to prefix executables --# Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile -- --ARCH ?= $(SUBARCH) --CROSS_COMPILE ?= -- --# Architecture as present in compile.h --UTS_MACHINE := $(ARCH) -- --KCONFIG_CONFIG ?= .config -- --# SHELL used by kbuild --CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ -- else if [ -x /bin/bash ]; then echo /bin/bash; \ -- else echo sh; fi ; fi) -- --HOSTCC = gcc --HOSTCXX = g++ --HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer --HOSTCXXFLAGS = -O2 -- --# Decide whether to build built-in, modular, or both. --# Normally, just do built-in. -- --KBUILD_MODULES := --KBUILD_BUILTIN := 1 -- --# If we have only "make modules", don't compile built-in objects. --# When we're building modules with modversions, we need to consider --# the built-in objects during the descend as well, in order to --# make sure the checksums are up to date before we record them. -- --ifeq ($(MAKECMDGOALS),modules) -- KBUILD_BUILTIN := $(if $(CONFIG_MODVERSIONS),1) --endif -- --# If we have "make modules", compile modules --# in addition to whatever we do anyway. --# Just "make" or "make all" shall build modules as well -- --ifneq ($(filter all _all modules,$(MAKECMDGOALS)),) -- KBUILD_MODULES := 1 --endif -- --ifeq ($(MAKECMDGOALS),) -- KBUILD_MODULES := 1 --endif -- --export KBUILD_MODULES KBUILD_BUILTIN --export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD -- --# Beautify output --# --------------------------------------------------------------------------- --# --# Normally, we echo the whole command before executing it. By making --# that echo $($(quiet)$(cmd)), we now have the possibility to set --# $(quiet) to choose other forms of output instead, e.g. --# --# quiet_cmd_cc_o_c = Compiling $(RELDIR)/$@ --# cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< --# --# If $(quiet) is empty, the whole command will be printed. --# If it is set to "quiet_", only the short version will be printed. --# If it is set to "silent_", nothing will be printed at all, since --# the variable $(silent_cmd_cc_o_c) doesn't exist. --# --# A simple variant is to prefix commands with $(Q) - that's useful --# for commands that shall be hidden in non-verbose mode. --# --# $(Q)ln $@ :< --# --# If KBUILD_VERBOSE equals 0 then the above command will be hidden. --# If KBUILD_VERBOSE equals 1 then the above command is displayed. -- --ifeq ($(KBUILD_VERBOSE),1) -- quiet = -- Q = --else -- quiet=quiet_ -- Q = @ --endif -- --# If the user is running make -s (silent mode), suppress echoing of --# commands -- --ifneq ($(findstring s,$(MAKEFLAGS)),) -- quiet=silent_ --endif -- --export quiet Q KBUILD_VERBOSE -- -- --# Look for make include files relative to root of kernel src --MAKEFLAGS += --include-dir=$(srctree) -- --# We need some generic definitions. --include $(srctree)/scripts/Kbuild.include -- --# Make variables (CC, etc...) -- --AS = $(CROSS_COMPILE)as --LD = $(CROSS_COMPILE)ld --CC = $(CROSS_COMPILE)gcc --CPP = $(CC) -E --AR = $(CROSS_COMPILE)ar --NM = $(CROSS_COMPILE)nm --STRIP = $(CROSS_COMPILE)strip --OBJCOPY = $(CROSS_COMPILE)objcopy --OBJDUMP = $(CROSS_COMPILE)objdump --AWK = awk --GENKSYMS = scripts/genksyms/genksyms --DEPMOD = /sbin/depmod --KALLSYMS = scripts/kallsyms --PERL = perl --CHECK = sparse -- --CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise $(CF) --MODFLAGS = -DMODULE --CFLAGS_MODULE = $(MODFLAGS) --AFLAGS_MODULE = $(MODFLAGS) --LDFLAGS_MODULE = -r --CFLAGS_KERNEL = --AFLAGS_KERNEL = -- -- --# Use LINUXINCLUDE when you must reference the include/ directory. --# Needed to be compatible with the O= option --LINUXINCLUDE := -Iinclude \ -- $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \ -- -include include/linux/autoconf.h -- --CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE) -- --CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -- -fno-strict-aliasing -fno-common --AFLAGS := -D__ASSEMBLY__ -- --# Read KERNELRELEASE from include/config/kernel.release (if it exists) --KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) --KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) -- --export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION --export ARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC --export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE --export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS -- --export CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS --export CFLAGS CFLAGS_KERNEL CFLAGS_MODULE --export AFLAGS AFLAGS_KERNEL AFLAGS_MODULE -- --# When compiling out-of-tree modules, put MODVERDIR in the module --# tree rather than in the kernel tree. The kernel tree might --# even be read-only. --export MODVERDIR := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_versions -- --# Files to ignore in find ... statements -- --RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS -o -name .pc -o -name .hg -o -name .git \) -prune -o --export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn --exclude CVS --exclude .pc --exclude .hg --exclude .git -- --# =========================================================================== --# Rules shared between *config targets and build targets -- --# Basic helpers built in scripts/ --PHONY += scripts_basic --scripts_basic: -- $(Q)$(MAKE) $(build)=scripts/basic -- --# To avoid any implicit rule to kick in, define an empty command. --scripts/basic/%: scripts_basic ; -- --PHONY += outputmakefile --# outputmakefile generates a Makefile in the output directory, if using a --# separate output directory. This allows convenient use of make in the --# output directory. --outputmakefile: --ifneq ($(KBUILD_SRC),) -- $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile \ -- $(srctree) $(objtree) $(VERSION) $(PATCHLEVEL) --endif -- --# To make sure we do not include .config for any of the *config targets --# catch them early, and hand them over to scripts/kconfig/Makefile --# It is allowed to specify more targets when calling make, including --# mixing *config targets and build targets. --# For example 'make oldconfig all'. --# Detect when mixed targets is specified, and make a second invocation --# of make so .config is not included in this case either (for *config). -- --no-dot-config-targets := clean mrproper distclean \ -- cscope TAGS tags help %docs check% \ -- include/linux/version.h headers_% \ -- kernelrelease kernelversion -- --config-targets := 0 --mixed-targets := 0 --dot-config := 1 -- --ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),) -- ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) -- dot-config := 0 -- endif --endif -- --ifeq ($(KBUILD_EXTMOD),) -- ifneq ($(filter config %config,$(MAKECMDGOALS)),) -- config-targets := 1 -- ifneq ($(filter-out config %config,$(MAKECMDGOALS)),) -- mixed-targets := 1 -- endif -- endif --endif -- --ifeq ($(mixed-targets),1) --# =========================================================================== --# We're called with mixed targets (*config and build targets). --# Handle them one by one. -- --%:: FORCE -- $(Q)$(MAKE) -C $(srctree) KBUILD_SRC= $@ -- --else --ifeq ($(config-targets),1) --# =========================================================================== --# *config targets only - make sure prerequisites are updated, and descend --# in scripts/kconfig to make the *config target -- --# Read arch specific Makefile to set KBUILD_DEFCONFIG as needed. --# KBUILD_DEFCONFIG may point out an alternative default configuration --# used for 'make defconfig' --include $(srctree)/arch/$(ARCH)/Makefile --export KBUILD_DEFCONFIG -- --config %config: scripts_basic outputmakefile FORCE -- $(Q)mkdir -p include/linux include/config -- $(Q)$(MAKE) $(build)=scripts/kconfig $@ -- --else --# =========================================================================== --# Build targets only - this includes vmlinux, arch specific targets, clean --# targets and others. In general all targets except *config targets. -- --ifeq ($(KBUILD_EXTMOD),) --# Additional helpers built in scripts/ --# Carefully list dependencies so we do not try to build scripts twice --# in parallel --PHONY += scripts --scripts: scripts_basic include/config/auto.conf -- $(Q)$(MAKE) $(build)=$(@) -- --# Objects we will link into vmlinux / subdirs we need to visit --init-y := init/ --drivers-y := drivers/ sound/ --net-y := net/ --libs-y := lib/ --core-y := usr/ --endif # KBUILD_EXTMOD -- --ifeq ($(dot-config),1) --# Read in config ---include include/config/auto.conf -- --ifeq ($(KBUILD_EXTMOD),) --# Read in dependencies to all Kconfig* files, make sure to run --# oldconfig if changes are detected. ---include include/config/auto.conf.cmd -- --# To avoid any implicit rule to kick in, define an empty command --$(KCONFIG_CONFIG) include/config/auto.conf.cmd: ; -- --# If .config is newer than include/config/auto.conf, someone tinkered --# with it and forgot to run make oldconfig. --# if auto.conf.cmd is missing then we are probably in a cleaned tree so --# we execute the config step to be sure to catch updated Kconfig files --include/config/auto.conf: $(KCONFIG_CONFIG) include/config/auto.conf.cmd -- $(Q)$(MAKE) -f $(srctree)/Makefile silentoldconfig --else --# external modules needs include/linux/autoconf.h and include/config/auto.conf --# but do not care if they are up-to-date. Use auto.conf to trigger the test --PHONY += include/config/auto.conf -- --include/config/auto.conf: -- $(Q)test -e include/linux/autoconf.h -a -e $@ || ( \ -- echo; \ -- echo " ERROR: Kernel configuration is invalid."; \ -- echo " include/linux/autoconf.h or $@ are missing."; \ -- echo " Run 'make oldconfig && make prepare' on kernel src to fix it."; \ -- echo; \ -- /bin/false) -- --endif # KBUILD_EXTMOD -- --else --# Dummy target needed, because used as prerequisite --include/config/auto.conf: ; --endif # $(dot-config) -- --# The all: target is the default when no target is given on the --# command line. --# This allow a user to issue only 'make' to build a kernel including modules --# Defaults vmlinux but it is usually overridden in the arch makefile --all: vmlinux -- --ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE --CFLAGS += -Os --else --CFLAGS += -O2 --endif -- --include $(srctree)/arch/$(ARCH)/Makefile -- --ifdef CONFIG_FRAME_POINTER --CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) --else --CFLAGS += -fomit-frame-pointer --endif -- --ifdef CONFIG_DEBUG_INFO --CFLAGS += -g --endif -- --# Force gcc to behave correct even for buggy distributions --CFLAGS += $(call cc-option, -fno-stack-protector) -- --# arch Makefile may override CC so keep this after arch Makefile is included --NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) --CHECKFLAGS += $(NOSTDINC_FLAGS) -- --# warn about C99 declaration after statement --CFLAGS += $(call cc-option,-Wdeclaration-after-statement,) -- --# disable pointer signed / unsigned warnings in gcc 4.0 --CFLAGS += $(call cc-option,-Wno-pointer-sign,) -- --# Default kernel image to build when no specific target is given. --# KBUILD_IMAGE may be overruled on the command line or --# set in the environment --# Also any assignments in arch/$(ARCH)/Makefile take precedence over --# this default value --export KBUILD_IMAGE ?= vmlinux -- --# --# INSTALL_PATH specifies where to place the updated kernel and system map --# images. Default is /boot, but you can set it to other values --export INSTALL_PATH ?= /boot -- --# --# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory --# relocations required by build roots. This is not defined in the --# makefile but the argument can be passed to make if needed. --# -- --MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) --export MODLIB -- --# --# INSTALL_MOD_STRIP, if defined, will cause modules to be --# stripped after they are installed. If INSTALL_MOD_STRIP is '1', then --# the default option --strip-debug will be used. Otherwise, --# INSTALL_MOD_STRIP will used as the options to the strip command. -- --ifdef INSTALL_MOD_STRIP --ifeq ($(INSTALL_MOD_STRIP),1) --mod_strip_cmd = $(STRIP) --strip-debug --else --mod_strip_cmd = $(STRIP) $(INSTALL_MOD_STRIP) --endif # INSTALL_MOD_STRIP=1 --else --mod_strip_cmd = true --endif # INSTALL_MOD_STRIP --export mod_strip_cmd -- -- --ifeq ($(KBUILD_EXTMOD),) --core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ -- --vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ -- $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ -- $(net-y) $(net-m) $(libs-y) $(libs-m))) -- --vmlinux-alldirs := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \ -- $(init-n) $(init-) \ -- $(core-n) $(core-) $(drivers-n) $(drivers-) \ -- $(net-n) $(net-) $(libs-n) $(libs-)))) -- --init-y := $(patsubst %/, %/built-in.o, $(init-y)) --core-y := $(patsubst %/, %/built-in.o, $(core-y)) --drivers-y := $(patsubst %/, %/built-in.o, $(drivers-y)) --net-y := $(patsubst %/, %/built-in.o, $(net-y)) --libs-y1 := $(patsubst %/, %/lib.a, $(libs-y)) --libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y)) --libs-y := $(libs-y1) $(libs-y2) -- --# Build vmlinux --# --------------------------------------------------------------------------- --# vmlinux is built from the objects selected by $(vmlinux-init) and --# $(vmlinux-main). Most are built-in.o files from top-level directories --# in the kernel tree, others are specified in arch/$(ARCH)/Makefile. --# Ordering when linking is important, and $(vmlinux-init) must be first. --# --# vmlinux --# ^ --# | --# +-< $(vmlinux-init) --# | +--< init/version.o + more --# | --# +--< $(vmlinux-main) --# | +--< driver/built-in.o mm/built-in.o + more --# | --# +-< kallsyms.o (see description in CONFIG_KALLSYMS section) --# --# vmlinux version (uname -v) cannot be updated during normal --# descending-into-subdirs phase since we do not yet know if we need to --# update vmlinux. --# Therefore this step is delayed until just before final link of vmlinux - --# except in the kallsyms case where it is done just before adding the --# symbols to the kernel. --# --# System.map is generated to document addresses of all kernel symbols -- --vmlinux-init := $(head-y) $(init-y) --vmlinux-main := $(core-y) $(libs-y) $(drivers-y) $(net-y) --vmlinux-all := $(vmlinux-init) $(vmlinux-main) --vmlinux-lds := arch/$(ARCH)/kernel/vmlinux.lds --export KBUILD_VMLINUX_OBJS := $(vmlinux-all) -- --# Rule to link vmlinux - also used during CONFIG_KALLSYMS --# May be overridden by arch/$(ARCH)/Makefile --quiet_cmd_vmlinux__ ?= LD $@ -- cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ -- -T $(vmlinux-lds) $(vmlinux-init) \ -- --start-group $(vmlinux-main) --end-group \ -- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) -- --# Generate new vmlinux version --quiet_cmd_vmlinux_version = GEN .version -- cmd_vmlinux_version = set -e; \ -- if [ ! -r .version ]; then \ -- rm -f .version; \ -- echo 1 >.version; \ -- else \ -- mv .version .old_version; \ -- expr 0$$(cat .old_version) + 1 >.version; \ -- fi; \ -- $(MAKE) $(build)=init -- --# Generate System.map --quiet_cmd_sysmap = SYSMAP -- cmd_sysmap = $(CONFIG_SHELL) $(srctree)/scripts/mksysmap -- --# Link of vmlinux --# If CONFIG_KALLSYMS is set .version is already updated --# Generate System.map and verify that the content is consistent --# Use + in front of the vmlinux_version rule to silent warning with make -j2 --# First command is ':' to allow us to use + in front of the rule --define rule_vmlinux__ -- : -- $(if $(CONFIG_KALLSYMS),,+$(call cmd,vmlinux_version)) -- -- $(call cmd,vmlinux__) -- $(Q)echo 'cmd_$@ := $(cmd_vmlinux__)' > $(@D)/.$(@F).cmd -- -- $(Q)$(if $($(quiet)cmd_sysmap), \ -- echo ' $($(quiet)cmd_sysmap) System.map' &&) \ -- $(cmd_sysmap) $@ System.map; \ -- if [ $$? -ne 0 ]; then \ -- rm -f $@; \ -- /bin/false; \ -- fi; -- $(verify_kallsyms) --endef -- -- --ifdef CONFIG_KALLSYMS --# Generate section listing all symbols and add it into vmlinux $(kallsyms.o) --# It's a three stage process: --# o .tmp_vmlinux1 has all symbols and sections, but __kallsyms is --# empty --# Running kallsyms on that gives us .tmp_kallsyms1.o with --# the right size - vmlinux version (uname -v) is updated during this step --# o .tmp_vmlinux2 now has a __kallsyms section of the right size, --# but due to the added section, some addresses have shifted. --# From here, we generate a correct .tmp_kallsyms2.o --# o The correct .tmp_kallsyms2.o is linked into the final vmlinux. --# o Verify that the System.map from vmlinux matches the map from --# .tmp_vmlinux2, just in case we did not generate kallsyms correctly. --# o If CONFIG_KALLSYMS_EXTRA_PASS is set, do an extra pass using --# .tmp_vmlinux3 and .tmp_kallsyms3.o. This is only meant as a --# temporary bypass to allow the kernel to be built while the --# maintainers work out what went wrong with kallsyms. -- --ifdef CONFIG_KALLSYMS_EXTRA_PASS --last_kallsyms := 3 --else --last_kallsyms := 2 --endif -- --kallsyms.o := .tmp_kallsyms$(last_kallsyms).o -- --define verify_kallsyms -- $(Q)$(if $($(quiet)cmd_sysmap), \ -- echo ' $($(quiet)cmd_sysmap) .tmp_System.map' &&) \ -- $(cmd_sysmap) .tmp_vmlinux$(last_kallsyms) .tmp_System.map -- $(Q)cmp -s System.map .tmp_System.map || \ -- (echo Inconsistent kallsyms data; \ -- echo Try setting CONFIG_KALLSYMS_EXTRA_PASS; \ -- rm .tmp_kallsyms* ; /bin/false ) --endef -- --# Update vmlinux version before link --# Use + in front of this rule to silent warning about make -j1 --# First command is ':' to allow us to use + in front of this rule --cmd_ksym_ld = $(cmd_vmlinux__) --define rule_ksym_ld -- : -- +$(call cmd,vmlinux_version) -- $(call cmd,vmlinux__) -- $(Q)echo 'cmd_$@ := $(cmd_vmlinux__)' > $(@D)/.$(@F).cmd --endef -- --# Generate .S file with all kernel symbols --quiet_cmd_kallsyms = KSYM $@ -- cmd_kallsyms = $(NM) -n $< | $(KALLSYMS) \ -- $(if $(CONFIG_KALLSYMS_ALL),--all-symbols) > $@ -- --.tmp_kallsyms1.o .tmp_kallsyms2.o .tmp_kallsyms3.o: %.o: %.S scripts FORCE -- $(call if_changed_dep,as_o_S) -- --.tmp_kallsyms%.S: .tmp_vmlinux% $(KALLSYMS) -- $(call cmd,kallsyms) -- --# .tmp_vmlinux1 must be complete except kallsyms, so update vmlinux version --.tmp_vmlinux1: $(vmlinux-lds) $(vmlinux-all) FORCE -- $(call if_changed_rule,ksym_ld) -- --.tmp_vmlinux2: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms1.o FORCE -- $(call if_changed,vmlinux__) -- --.tmp_vmlinux3: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms2.o FORCE -- $(call if_changed,vmlinux__) -- --# Needs to visit scripts/ before $(KALLSYMS) can be used. --$(KALLSYMS): scripts ; -- --# Generate some data for debugging strange kallsyms problems --debug_kallsyms: .tmp_map$(last_kallsyms) -- --.tmp_map%: .tmp_vmlinux% FORCE -- ($(OBJDUMP) -h $< | $(AWK) '/^ +[0-9]/{print $$4 " 0 " $$2}'; $(NM) $<) | sort > $@ -- --.tmp_map3: .tmp_map2 -- --.tmp_map2: .tmp_map1 -- --endif # ifdef CONFIG_KALLSYMS -- --# vmlinux image - including updated kernel symbols --vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE --ifdef CONFIG_HEADERS_CHECK -- $(Q)$(MAKE) -f $(srctree)/Makefile headers_check --endif -- $(call if_changed_rule,vmlinux__) -- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ -- $(Q)rm -f .old_version -- --# The actual objects are generated when descending, --# make sure no implicit rule kicks in --$(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; -- --# Handle descending into subdirectories listed in $(vmlinux-dirs) --# Preset locale variables to speed up the build process. Limit locale --# tweaks to this spot to avoid wrong language settings when running --# make menuconfig etc. --# Error messages still appears in the original language -- --PHONY += $(vmlinux-dirs) --$(vmlinux-dirs): prepare scripts -- $(Q)$(MAKE) $(build)=$@ -- --# Build the kernel release string --# --# The KERNELRELEASE value built here is stored in the file --# include/config/kernel.release, and is used when executing several --# make targets, such as "make install" or "make modules_install." --# --# The eventual kernel release string consists of the following fields, --# shown in a hierarchical format to show how smaller parts are concatenated --# to form the larger and final value, with values coming from places like --# the Makefile, kernel config options, make command line options and/or --# SCM tag information. --# --# $(KERNELVERSION) --# $(VERSION) eg, 2 --# $(PATCHLEVEL) eg, 6 --# $(SUBLEVEL) eg, 18 --# $(EXTRAVERSION) eg, -rc6 --# $(localver-full) --# $(localver) --# localversion* (files without backups, containing '~') --# $(CONFIG_LOCALVERSION) (from kernel config setting) --# $(localver-auto) (only if CONFIG_LOCALVERSION_AUTO is set) --# ./scripts/setlocalversion (SCM tag, if one exists) --# $(LOCALVERSION) (from make command line if provided) --# --# Note how the final $(localver-auto) string is included *only* if the --# kernel config option CONFIG_LOCALVERSION_AUTO is selected. Also, at the --# moment, only git is supported but other SCMs can edit the script --# scripts/setlocalversion and add the appropriate checks as needed. -- --pattern = ".*/localversion[^~]*" --string = $(shell cat /dev/null \ -- `find $(objtree) $(srctree) -maxdepth 1 -regex $(pattern) | sort -u`) -- --localver = $(subst $(space),, $(string) \ -- $(patsubst "%",%,$(CONFIG_LOCALVERSION))) -- --# If CONFIG_LOCALVERSION_AUTO is set scripts/setlocalversion is called --# and if the SCM is know a tag from the SCM is appended. --# The appended tag is determined by the SCM used. --# --# Currently, only git is supported. --# Other SCMs can edit scripts/setlocalversion and add the appropriate --# checks as needed. --ifdef CONFIG_LOCALVERSION_AUTO -- _localver-auto = $(shell $(CONFIG_SHELL) \ -- $(srctree)/scripts/setlocalversion $(srctree)) -- localver-auto = $(LOCALVERSION)$(_localver-auto) --endif -- --localver-full = $(localver)$(localver-auto) -- --# Store (new) KERNELRELASE string in include/config/kernel.release --kernelrelease = $(KERNELVERSION)$(localver-full) --include/config/kernel.release: include/config/auto.conf FORCE -- $(Q)rm -f $@ -- $(Q)echo $(kernelrelease) > $@ -- -- --# Things we need to do before we recursively start building the kernel --# or the modules are listed in "prepare". --# A multi level approach is used. prepareN is processed before prepareN-1. --# archprepare is used in arch Makefiles and when processed asm symlink, --# version.h and scripts_basic is processed / created. -- --# Listed in dependency order --PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3 -- --# prepare3 is used to check if we are building in a separate output directory, --# and if so do: --# 1) Check that make has not been executed in the kernel src $(srctree) --# 2) Create the include2 directory, used for the second asm symlink --prepare3: include/config/kernel.release --ifneq ($(KBUILD_SRC),) -- @echo ' Using $(srctree) as source for kernel' -- $(Q)if [ -f $(srctree)/.config -o -d $(srctree)/include/config ]; then \ -- echo " $(srctree) is not clean, please run 'make mrproper'";\ -- echo " in the '$(srctree)' directory.";\ -- /bin/false; \ -- fi; -- $(Q)if [ ! -d include2 ]; then mkdir -p include2; fi; -- $(Q)ln -fsn $(srctree)/include/asm-$(ARCH) include2/asm --endif -- --# prepare2 creates a makefile if using a separate output directory --prepare2: prepare3 outputmakefile -- --prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \ -- include/asm include/config/auto.conf --ifneq ($(KBUILD_MODULES),) -- $(Q)mkdir -p $(MODVERDIR) -- $(Q)rm -f $(MODVERDIR)/* --endif -- --archprepare: prepare1 scripts_basic -- --prepare0: archprepare FORCE -- $(Q)$(MAKE) $(build)=. -- $(Q)$(MAKE) $(build)=. missing-syscalls -- --# All the preparing.. --prepare: prepare0 -- --# Leave this as default for preprocessing vmlinux.lds.S, which is now --# done in arch/$(ARCH)/kernel/Makefile -- --export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH) -- --# FIXME: The asm symlink changes when $(ARCH) changes. That's --# hard to detect, but I suppose "make mrproper" is a good idea --# before switching between archs anyway. -- --include/asm: -- @echo ' SYMLINK $@ -> include/asm-$(ARCH)' -- $(Q)if [ ! -d include ]; then mkdir -p include; fi; -- @ln -fsn asm-$(ARCH) $@ -- --# Generate some files --# --------------------------------------------------------------------------- -- --# KERNELRELEASE can change from a few different places, meaning version.h --# needs to be updated, so this check is forced on all builds -- --uts_len := 64 --define filechk_utsrelease.h -- if [ `echo -n "$(KERNELRELEASE)" | wc -c ` -gt $(uts_len) ]; then \ -- echo '"$(KERNELRELEASE)" exceeds $(uts_len) characters' >&2; \ -- exit 1; \ -- fi; \ -- (echo \#define UTS_RELEASE \"$(KERNELRELEASE)\";) --endef -- --define filechk_version.h -- (echo \#define LINUX_VERSION_CODE $(shell \ -- expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \ -- echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))';) --endef -- --include/linux/version.h: $(srctree)/Makefile FORCE -- $(call filechk,version.h) -- --include/linux/utsrelease.h: include/config/kernel.release FORCE -- $(call filechk,utsrelease.h) -- --# --------------------------------------------------------------------------- -- --PHONY += depend dep --depend dep: -- @echo '*** Warning: make $@ is unnecessary now.' -- --# --------------------------------------------------------------------------- --# Kernel headers --INSTALL_HDR_PATH=$(objtree)/usr --export INSTALL_HDR_PATH -- --HDRARCHES=$(filter-out generic,$(patsubst $(srctree)/include/asm-%/Kbuild,%,$(wildcard $(srctree)/include/asm-*/Kbuild))) -- --PHONY += headers_install_all --headers_install_all: include/linux/version.h scripts_basic FORCE -- $(Q)$(MAKE) $(build)=scripts scripts/unifdef -- $(Q)for arch in $(HDRARCHES); do \ -- $(MAKE) ARCH=$$arch -f $(srctree)/scripts/Makefile.headersinst obj=include BIASMDIR=-bi-$$arch ;\ -- done -- --PHONY += headers_install --headers_install: include/linux/version.h scripts_basic FORCE -- @if [ ! -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \ -- echo '*** Error: Headers not exportable for this architecture ($(ARCH))'; \ -- exit 1 ; fi -- $(Q)$(MAKE) $(build)=scripts scripts/unifdef -- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.headersinst obj=include -- --PHONY += headers_check_all --headers_check_all: headers_install_all -- $(Q)for arch in $(HDRARCHES); do \ -- $(MAKE) ARCH=$$arch -f $(srctree)/scripts/Makefile.headersinst obj=include BIASMDIR=-bi-$$arch HDRCHECK=1 ;\ -- done -- --PHONY += headers_check --headers_check: headers_install -- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.headersinst obj=include HDRCHECK=1 -- --# --------------------------------------------------------------------------- --# Modules -- --ifdef CONFIG_MODULES -- --# By default, build modules as well -- --all: modules -- --# Build modules -- --PHONY += modules --modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) -- @echo ' Building modules, stage 2.'; -- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost -- -- --# Target to prepare building external modules --PHONY += modules_prepare --modules_prepare: prepare scripts -- --# Target to install modules --PHONY += modules_install --modules_install: _modinst_ _modinst_post -- --PHONY += _modinst_ --_modinst_: -- @if [ -z "`$(DEPMOD) -V 2>/dev/null | grep module-init-tools`" ]; then \ -- echo "Warning: you may need to install module-init-tools"; \ -- echo "See http://www.codemonkey.org.uk/docs/post-halloween-2.6.txt";\ -- sleep 1; \ -- fi -- @rm -rf $(MODLIB)/kernel -- @rm -f $(MODLIB)/source -- @mkdir -p $(MODLIB)/kernel -- @ln -s $(srctree) $(MODLIB)/source -- @if [ ! $(objtree) -ef $(MODLIB)/build ]; then \ -- rm -f $(MODLIB)/build ; \ -- ln -s $(objtree) $(MODLIB)/build ; \ -- fi -- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst -- --# If System.map exists, run depmod. This deliberately does not have a --# dependency on System.map since that would run the dependency tree on --# vmlinux. This depmod is only for convenience to give the initial --# boot a modules.dep even before / is mounted read-write. However the --# boot script depmod is the master version. --ifeq "$(strip $(INSTALL_MOD_PATH))" "" --depmod_opts := --else --depmod_opts := -b $(INSTALL_MOD_PATH) -r --endif --PHONY += _modinst_post --_modinst_post: _modinst_ -- if [ -r System.map -a -x $(DEPMOD) ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi -- --else # CONFIG_MODULES -- --# Modules not configured --# --------------------------------------------------------------------------- -- --modules modules_install: FORCE -- @echo -- @echo "The present kernel configuration has modules disabled." -- @echo "Type 'make config' and enable loadable module support." -- @echo "Then build a kernel with module support enabled." -- @echo -- @exit 1 -- --endif # CONFIG_MODULES -- --### --# Cleaning is done on three levels. --# make clean Delete most generated files --# Leave enough to build external modules --# make mrproper Delete the current configuration, and all generated files --# make distclean Remove editor backup files, patch leftover files and the like -- --# Directories & files removed with 'make clean' --CLEAN_DIRS += $(MODVERDIR) --CLEAN_FILES += vmlinux System.map \ -- .tmp_kallsyms* .tmp_version .tmp_vmlinux* .tmp_System.map -- --# Directories & files removed with 'make mrproper' --MRPROPER_DIRS += include/config include2 usr/include --MRPROPER_FILES += .config .config.old include/asm .version .old_version \ -- include/linux/autoconf.h include/linux/version.h \ -- include/linux/utsrelease.h \ -- Module.symvers tags TAGS cscope* -- --# clean - Delete most, but leave enough to build external modules --# --clean: rm-dirs := $(CLEAN_DIRS) --clean: rm-files := $(CLEAN_FILES) --clean-dirs := $(addprefix _clean_,$(srctree) $(vmlinux-alldirs)) -- --PHONY += $(clean-dirs) clean archclean --$(clean-dirs): -- $(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@) -- --clean: archclean $(clean-dirs) -- $(call cmd,rmdirs) -- $(call cmd,rmfiles) -- @find . $(RCS_FIND_IGNORE) \ -- \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \ -- -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \ -- -o -name '*.symtypes' \) \ -- -type f -print | xargs rm -f -- --# mrproper - Delete all generated files, including .config --# --mrproper: rm-dirs := $(wildcard $(MRPROPER_DIRS)) --mrproper: rm-files := $(wildcard $(MRPROPER_FILES)) --mrproper-dirs := $(addprefix _mrproper_,Documentation/DocBook scripts) -- --PHONY += $(mrproper-dirs) mrproper archmrproper --$(mrproper-dirs): -- $(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@) -- --mrproper: clean archmrproper $(mrproper-dirs) -- $(call cmd,rmdirs) -- $(call cmd,rmfiles) -- --# distclean --# --PHONY += distclean -- --distclean: mrproper -- @find $(srctree) $(RCS_FIND_IGNORE) \ -- \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ -- -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \ -- -o -name '.*.rej' -o -size 0 \ -- -o -name '*%' -o -name '.*.cmd' -o -name 'core' \) \ -- -type f -print | xargs rm -f -- -- --# Packaging of the kernel to various formats --# --------------------------------------------------------------------------- --# rpm target kept for backward compatibility --package-dir := $(srctree)/scripts/package -- --%pkg: include/config/kernel.release FORCE -- $(Q)$(MAKE) $(build)=$(package-dir) $@ --rpm: include/config/kernel.release FORCE -- $(Q)$(MAKE) $(build)=$(package-dir) $@ -- -- --# Brief documentation of the typical targets used --# --------------------------------------------------------------------------- -- --boards := $(wildcard $(srctree)/arch/$(ARCH)/configs/*_defconfig) --boards := $(notdir $(boards)) -- --help: -- @echo 'Cleaning targets:' -- @echo ' clean - Remove most generated files but keep the config and' -- @echo ' enough build support to build external modules' -- @echo ' mrproper - Remove all generated files + config + various backup files' -- @echo ' distclean - mrproper + remove editor backup and patch files' -- @echo '' -- @echo 'Configuration targets:' -- @$(MAKE) -f $(srctree)/scripts/kconfig/Makefile help -- @echo '' -- @echo 'Other generic targets:' -- @echo ' all - Build all targets marked with [*]' -- @echo '* vmlinux - Build the bare kernel' -- @echo '* modules - Build all modules' -- @echo ' modules_install - Install all modules to INSTALL_MOD_PATH (default: /)' -- @echo ' dir/ - Build all files in dir and below' -- @echo ' dir/file.[ois] - Build specified target only' -- @echo ' dir/file.ko - Build module including final link' -- @echo ' rpm - Build a kernel as an RPM package' -- @echo ' tags/TAGS - Generate tags file for editors' -- @echo ' cscope - Generate cscope index' -- @echo ' kernelrelease - Output the release version string' -- @echo ' kernelversion - Output the version stored in Makefile' -- @if [ -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \ -- echo ' headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'; \ -- echo ' (default: $(INSTALL_HDR_PATH))'; \ -- fi -- @echo '' -- @echo 'Static analysers' -- @echo ' checkstack - Generate a list of stack hogs' -- @echo ' namespacecheck - Name space analysis on compiled kernel' -- @if [ -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \ -- echo ' headers_check - Sanity check on exported headers'; \ -- fi -- @echo '' -- @echo 'Kernel packaging:' -- @$(MAKE) $(build)=$(package-dir) help -- @echo '' -- @echo 'Documentation targets:' -- @$(MAKE) -f $(srctree)/Documentation/DocBook/Makefile dochelp -- @echo '' -- @echo 'Architecture specific targets ($(ARCH)):' -- @$(if $(archhelp),$(archhelp),\ -- echo ' No architecture specific help defined for $(ARCH)') -- @echo '' -- @$(if $(boards), \ -- $(foreach b, $(boards), \ -- printf " %-24s - Build for %s\\n" $(b) $(subst _defconfig,,$(b));) \ -- echo '') -- -- @echo ' make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build' -- @echo ' make V=2 [targets] 2 => give reason for rebuild of target' -- @echo ' make O=dir [targets] Locate all output files in "dir", including .config' -- @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)' -- @echo ' make C=2 [targets] Force check of all c source with $$CHECK' -- @echo '' -- @echo 'Execute "make" or "make all" to build all targets marked with [*] ' -- @echo 'For further info see the ./README file' -- -- --# Documentation targets --# --------------------------------------------------------------------------- --%docs: scripts_basic FORCE -- $(Q)$(MAKE) $(build)=Documentation/DocBook $@ -- --else # KBUILD_EXTMOD -- --### --# External module support. --# When building external modules the kernel used as basis is considered --# read-only, and no consistency checks are made and the make --# system is not used on the basis kernel. If updates are required --# in the basis kernel ordinary make commands (without M=...) must --# be used. --# --# The following are the only valid targets when building external --# modules. --# make M=dir clean Delete all automatically generated files --# make M=dir modules Make all modules in specified dir --# make M=dir Same as 'make M=dir modules' --# make M=dir modules_install --# Install the modules built in the module directory --# Assumes install directory is already created -- --# We are always building modules --KBUILD_MODULES := 1 --PHONY += crmodverdir --crmodverdir: -- $(Q)mkdir -p $(MODVERDIR) -- $(Q)rm -f $(MODVERDIR)/* -- --PHONY += $(objtree)/Module.symvers --$(objtree)/Module.symvers: -- @test -e $(objtree)/Module.symvers || ( \ -- echo; \ -- echo " WARNING: Symbol version dump $(objtree)/Module.symvers"; \ -- echo " is missing; modules will have no dependencies and modversions."; \ -- echo ) -- --module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD)) --PHONY += $(module-dirs) modules --$(module-dirs): crmodverdir $(objtree)/Module.symvers -- $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@) -- --modules: $(module-dirs) -- @echo ' Building modules, stage 2.'; -- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost -- --PHONY += modules_install --modules_install: _emodinst_ _emodinst_post -- --install-dir := $(if $(INSTALL_MOD_DIR),$(INSTALL_MOD_DIR),extra) --PHONY += _emodinst_ --_emodinst_: -- $(Q)mkdir -p $(MODLIB)/$(install-dir) -- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst -- --# Run depmod only is we have System.map and depmod is executable --quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) -- cmd_depmod = if [ -r System.map -a -x $(DEPMOD) ]; then \ -- $(DEPMOD) -ae -F System.map \ -- $(if $(strip $(INSTALL_MOD_PATH)), \ -- -b $(INSTALL_MOD_PATH) -r) \ -- $(KERNELRELEASE); \ -- fi -- --PHONY += _emodinst_post --_emodinst_post: _emodinst_ -- $(call cmd,depmod) -- --clean-dirs := $(addprefix _clean_,$(KBUILD_EXTMOD)) -- --PHONY += $(clean-dirs) clean --$(clean-dirs): -- $(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@) -- --clean: rm-dirs := $(MODVERDIR) --clean: $(clean-dirs) -- $(call cmd,rmdirs) -- @find $(KBUILD_EXTMOD) $(RCS_FIND_IGNORE) \ -- \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \ -- -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \) \ -- -type f -print | xargs rm -f -- --help: -- @echo ' Building external modules.' -- @echo ' Syntax: make -C path/to/kernel/src M=$$PWD target' -- @echo '' -- @echo ' modules - default target, build the module(s)' -- @echo ' modules_install - install the module' -- @echo ' clean - remove generated files in module directory only' -- @echo '' -- --# Dummies... --PHONY += prepare scripts --prepare: ; --scripts: ; --endif # KBUILD_EXTMOD -- --# Generate tags for editors --# --------------------------------------------------------------------------- -- --#We want __srctree to totally vanish out when KBUILD_OUTPUT is not set --#(which is the most common case IMHO) to avoid unneeded clutter in the big tags file. --#Adding $(srctree) adds about 20M on i386 to the size of the output file! -- --ifeq ($(src),$(obj)) --__srctree = --else --__srctree = $(srctree)/ --endif -- --ifeq ($(ALLSOURCE_ARCHS),) --ifeq ($(ARCH),um) --ALLINCLUDE_ARCHS := $(ARCH) $(SUBARCH) --else --ALLINCLUDE_ARCHS := $(ARCH) --endif --else --#Allow user to specify only ALLSOURCE_PATHS on the command line, keeping existing behavour. --ALLINCLUDE_ARCHS := $(ALLSOURCE_ARCHS) --endif -- --ALLSOURCE_ARCHS := $(ARCH) -- --define find-sources -- ( for ARCH in $(ALLSOURCE_ARCHS) ; do \ -- find $(__srctree)arch/$${ARCH} $(RCS_FIND_IGNORE) \ -- -name $1 -print; \ -- done ; \ -- find $(__srctree)security/selinux/include $(RCS_FIND_IGNORE) \ -- -name $1 -print; \ -- find $(__srctree)include $(RCS_FIND_IGNORE) \ -- \( -name config -o -name 'asm-*' \) -prune \ -- -o -name $1 -print; \ -- for ARCH in $(ALLINCLUDE_ARCHS) ; do \ -- find $(__srctree)include/asm-$${ARCH} $(RCS_FIND_IGNORE) \ -- -name $1 -print; \ -- done ; \ -- find $(__srctree)include/asm-generic $(RCS_FIND_IGNORE) \ -- -name $1 -print; \ -- find $(__srctree) $(RCS_FIND_IGNORE) \ -- \( -name include -o -name arch \) -prune -o \ -- -name $1 -print; \ -- ) --endef -- --define all-sources -- $(call find-sources,'*.[chS]') --endef --define all-kconfigs -- $(call find-sources,'Kconfig*') --endef --define all-defconfigs -- $(call find-sources,'defconfig') --endef -- --define xtags -- if $1 --version 2>&1 | grep -iq exuberant; then \ -- $(all-sources) | xargs $1 -a \ -- -I __initdata,__exitdata,__acquires,__releases \ -- -I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL \ -- --extra=+f --c-kinds=+px \ -- --regex-asm='/ENTRY\(([^)]*)\).*/\1/'; \ -- $(all-kconfigs) | xargs $1 -a \ -- --langdef=kconfig \ -- --language-force=kconfig \ -- --regex-kconfig='/^[[:blank:]]*config[[:blank:]]+([[:alnum:]_]+)/\1/'; \ -- $(all-defconfigs) | xargs -r $1 -a \ -- --langdef=dotconfig \ -- --language-force=dotconfig \ -- --regex-dotconfig='/^#?[[:blank:]]*(CONFIG_[[:alnum:]_]+)/\1/'; \ -- elif $1 --version 2>&1 | grep -iq emacs; then \ -- $(all-sources) | xargs $1 -a; \ -- $(all-kconfigs) | xargs $1 -a \ -- --regex='/^[ \t]*config[ \t]+\([a-zA-Z0-9_]+\)/\1/'; \ -- $(all-defconfigs) | xargs -r $1 -a \ -- --regex='/^#?[ \t]?\(CONFIG_[a-zA-Z0-9_]+\)/\1/'; \ -- else \ -- $(all-sources) | xargs $1 -a; \ -- fi --endef -- --quiet_cmd_cscope-file = FILELST cscope.files -- cmd_cscope-file = (echo \-k; echo \-q; $(all-sources)) > cscope.files -- --quiet_cmd_cscope = MAKE cscope.out -- cmd_cscope = cscope -b -- --cscope: FORCE -- $(call cmd,cscope-file) -- $(call cmd,cscope) -- --quiet_cmd_TAGS = MAKE $@ --define cmd_TAGS -- rm -f $@; \ -- $(call xtags,etags) --endef -- --TAGS: FORCE -- $(call cmd,TAGS) -- --quiet_cmd_tags = MAKE $@ --define cmd_tags -- rm -f $@; \ -- $(call xtags,ctags) --endef -- --tags: FORCE -- $(call cmd,tags) -- -- --# Scripts to check various things for consistency --# --------------------------------------------------------------------------- -- --includecheck: -- find * $(RCS_FIND_IGNORE) \ -- -name '*.[hcS]' -type f -print | sort \ -- | xargs $(PERL) -w scripts/checkincludes.pl -- --versioncheck: -- find * $(RCS_FIND_IGNORE) \ -- -name '*.[hcS]' -type f -print | sort \ -- | xargs $(PERL) -w scripts/checkversion.pl -- --namespacecheck: -- $(PERL) $(srctree)/scripts/namespace.pl -- --endif #ifeq ($(config-targets),1) --endif #ifeq ($(mixed-targets),1) -- --PHONY += checkstack kernelrelease kernelversion -- --# UML needs a little special treatment here. It wants to use the host --# toolchain, so needs $(SUBARCH) passed to checkstack.pl. Everyone --# else wants $(ARCH), including people doing cross-builds, which means --# that $(SUBARCH) doesn't work here. --ifeq ($(ARCH), um) --CHECKSTACK_ARCH := $(SUBARCH) --else --CHECKSTACK_ARCH := $(ARCH) --endif --checkstack: -- $(OBJDUMP) -d vmlinux $$(find . -name '*.ko') | \ -- $(PERL) $(src)/scripts/checkstack.pl $(CHECKSTACK_ARCH) -- --kernelrelease: -- $(if $(wildcard include/config/kernel.release), $(Q)echo $(KERNELRELEASE), \ -- $(error kernelrelease not valid - run 'make prepare' to update it)) --kernelversion: -- @echo $(KERNELVERSION) -- --# Single targets --# --------------------------------------------------------------------------- --# Single targets are compatible with: --# - build whith mixed source and output --# - build with separate output dir 'make O=...' --# - external modules --# --# target-dir => where to store outputfile --# build-dir => directory in kernel source tree to use -- --ifeq ($(KBUILD_EXTMOD),) -- build-dir = $(patsubst %/,%,$(dir $@)) -- target-dir = $(dir $@) --else -- zap-slash=$(filter-out .,$(patsubst %/,%,$(dir $@))) -- build-dir = $(KBUILD_EXTMOD)$(if $(zap-slash),/$(zap-slash)) -- target-dir = $(if $(KBUILD_EXTMOD),$(dir $<),$(dir $@)) --endif -- --%.s: %.c prepare scripts FORCE -- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) --%.i: %.c prepare scripts FORCE -- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) --%.o: %.c prepare scripts FORCE -- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) --%.lst: %.c prepare scripts FORCE -- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) --%.s: %.S prepare scripts FORCE -- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) --%.o: %.S prepare scripts FORCE -- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) --%.symtypes: %.c prepare scripts FORCE -- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -- --# Modules --/ %/: prepare scripts FORCE -- $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ -- $(build)=$(build-dir) --%.ko: prepare scripts FORCE -- $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ -- $(build)=$(build-dir) $(@:.ko=.o) -- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost -- --# FIXME Should go into a make.lib or something --# =========================================================================== -- --quiet_cmd_rmdirs = $(if $(wildcard $(rm-dirs)),CLEAN $(wildcard $(rm-dirs))) -- cmd_rmdirs = rm -rf $(rm-dirs) -- --quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files))) -- cmd_rmfiles = rm -f $(rm-files) -- -- --a_flags = -Wp,-MD,$(depfile) $(AFLAGS) $(AFLAGS_KERNEL) \ -- $(NOSTDINC_FLAGS) $(CPPFLAGS) \ -- $(modkern_aflags) $(EXTRA_AFLAGS) $(AFLAGS_$(basetarget).o) -- --quiet_cmd_as_o_S = AS $@ --cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< -- --# read all saved command lines -- --targets := $(wildcard $(sort $(targets))) --cmd_files := $(wildcard .*.cmd $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) -- --ifneq ($(cmd_files),) -- $(cmd_files): ; # Do not try to update included dependency files -- include $(cmd_files) --endif -- --# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=dir --# Usage: --# $(Q)$(MAKE) $(clean)=dir --clean := -f $(if $(KBUILD_SRC),$(srctree)/)scripts/Makefile.clean obj -- --endif # skip-makefile -- --PHONY += FORCE --FORCE: -- --# Cancel implicit rules on top Makefile, `-rR' will apply to sub-makes. --Makefile: ; -- --# Declare the contents of the .PHONY variable as phony. We keep that --# information in a variable se we can use it in if_changed and friends. --.PHONY: $(PHONY) diff -Nurb linux-2.6.22-570/arch/arm/Kconfig linux-2.6.22-590/arch/arm/Kconfig --- linux-2.6.22-570/arch/arm/Kconfig 2008-03-20 13:25:43.000000000 -0400 +++ linux-2.6.22-590/arch/arm/Kconfig 2008-03-20 13:27:59.000000000 -0400 @@ -147930,154 +146433,6 @@ diff -Nurb linux-2.6.22-570/include/linux/vmalloc.h linux-2.6.22-590/include/lin /* * Internals. Dont't use.. -diff -Nurb linux-2.6.22-570/include/linux/vserver/network.h.orig.orig linux-2.6.22-590/include/linux/vserver/network.h.orig.orig ---- linux-2.6.22-570/include/linux/vserver/network.h.orig.orig 2008-03-20 13:25:49.000000000 -0400 -+++ linux-2.6.22-590/include/linux/vserver/network.h.orig.orig 1969-12-31 19:00:00.000000000 -0500 -@@ -1,143 +0,0 @@ --#ifndef _VX_NETWORK_H --#define _VX_NETWORK_H -- --#include -- -- --#define MAX_N_CONTEXT 65535 /* Arbitrary limit */ -- -- --/* network flags */ -- --#define NXF_INFO_PRIVATE 0x00000008 -- --#define NXF_SINGLE_IP 0x00000100 --#define NXF_LBACK_REMAP 0x00000200 -- --#define NXF_HIDE_NETIF 0x02000000 --#define NXF_HIDE_LBACK 0x04000000 -- --#define NXF_STATE_SETUP (1ULL << 32) --#define NXF_STATE_ADMIN (1ULL << 34) -- --#define NXF_SC_HELPER (1ULL << 36) --#define NXF_PERSISTENT (1ULL << 38) -- --#define NXF_ONE_TIME (0x0005ULL << 32) -- -- --#define NXF_INIT_SET (__nxf_init_set()) -- --static inline uint64_t __nxf_init_set(void) { -- return NXF_STATE_ADMIN --#ifdef CONFIG_VSERVER_AUTO_LBACK -- | NXF_LBACK_REMAP -- | NXF_HIDE_LBACK --#endif --#ifdef CONFIG_VSERVER_AUTO_SINGLE -- | NXF_SINGLE_IP --#endif -- | NXF_HIDE_NETIF; --} -- -- --/* network caps */ -- --#define NXC_RAW_ICMP 0x00000100 -- -- --/* address types */ -- --#define NXA_TYPE_IPV4 0x0001 --#define NXA_TYPE_IPV6 0x0002 -- --#define NXA_TYPE_NONE 0x0000 --#define NXA_TYPE_ANY 0x00FF -- --#define NXA_TYPE_ADDR 0x0010 --#define NXA_TYPE_MASK 0x0020 --#define NXA_TYPE_RANGE 0x0040 -- --#define NXA_MASK_ALL (NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE) -- --#define NXA_MOD_BCAST 0x0100 --#define NXA_MOD_LBACK 0x0200 -- --#define NXA_LOOPBACK 0x1000 -- --#define NXA_MASK_BIND (NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK) --#define NXA_MASK_SHOW (NXA_MASK_ALL | NXA_LOOPBACK) -- --#ifdef __KERNEL__ -- --#include --#include --#include --#include --#include --#include -- --struct nx_addr_v4 { -- struct nx_addr_v4 *next; -- struct in_addr ip[2]; -- struct in_addr mask; -- uint16_t type; -- uint16_t flags; --}; -- --struct nx_addr_v6 { -- struct nx_addr_v6 *next; -- struct in6_addr ip; -- struct in6_addr mask; -- uint32_t prefix; -- uint16_t type; -- uint16_t flags; --}; -- --struct nx_info { -- struct hlist_node nx_hlist; /* linked list of nxinfos */ -- nid_t nx_id; /* vnet id */ -- atomic_t nx_usecnt; /* usage count */ -- atomic_t nx_tasks; /* tasks count */ -- int nx_state; /* context state */ -- -- uint64_t nx_flags; /* network flag word */ -- uint64_t nx_ncaps; /* network capabilities */ -- -- struct in_addr v4_lback; /* Loopback address */ -- struct in_addr v4_bcast; /* Broadcast address */ -- struct nx_addr_v4 v4; /* First/Single ipv4 address */ --#ifdef CONFIG_IPV6 -- struct nx_addr_v6 v6; /* First/Single ipv6 address */ --#endif -- char nx_name[65]; /* network context name */ --}; -- -- --/* status flags */ -- --#define NXS_HASHED 0x0001 --#define NXS_SHUTDOWN 0x0100 --#define NXS_RELEASED 0x8000 -- --extern struct nx_info *lookup_nx_info(int); -- --extern int get_nid_list(int, unsigned int *, int); --extern int nid_is_hashed(nid_t); -- --extern int nx_migrate_task(struct task_struct *, struct nx_info *); -- --extern long vs_net_change(struct nx_info *, unsigned int); -- --struct sock; -- -- --#define NX_IPV4(n) ((n)->v4.type != NXA_TYPE_NONE) --#ifdef CONFIG_IPV6 --#define NX_IPV6(n) ((n)->v6.type != NXA_TYPE_NONE) --#else --#define NX_IPV6(n) (0) --#endif -- --#endif /* __KERNEL__ */ --#endif /* _VX_NETWORK_H */ -diff -Nurb linux-2.6.22-570/include/net/addrconf.h linux-2.6.22-590/include/net/addrconf.h --- linux-2.6.22-570/include/net/addrconf.h 2008-03-20 13:25:45.000000000 -0400 +++ linux-2.6.22-590/include/net/addrconf.h 2008-03-20 13:28:02.000000000 -0400 @@ -61,7 +61,7 @@ @@ -163732,2629 +162087,6 @@ diff -Nurb linux-2.6.22-570/mm/shmem.c linux-2.6.22-590/mm/shmem.c + vma->vm_flags |= VM_CAN_INVALIDATE; return 0; } -diff -Nurb linux-2.6.22-570/mm/shmem.c.orig linux-2.6.22-590/mm/shmem.c.orig ---- linux-2.6.22-570/mm/shmem.c.orig 2008-03-20 13:25:40.000000000 -0400 -+++ linux-2.6.22-590/mm/shmem.c.orig 1969-12-31 19:00:00.000000000 -0500 -@@ -1,2619 +0,0 @@ --/* -- * Resizable virtual memory filesystem for Linux. -- * -- * Copyright (C) 2000 Linus Torvalds. -- * 2000 Transmeta Corp. -- * 2000-2001 Christoph Rohland -- * 2000-2001 SAP AG -- * 2002 Red Hat Inc. -- * Copyright (C) 2002-2005 Hugh Dickins. -- * Copyright (C) 2002-2005 VERITAS Software Corporation. -- * Copyright (C) 2004 Andi Kleen, SuSE Labs -- * -- * Extended attribute support for tmpfs: -- * Copyright (c) 2004, Luke Kenneth Casson Leighton -- * Copyright (c) 2004 Red Hat, Inc., James Morris -- * -- * This file is released under the GPL. -- */ -- --/* -- * This virtual memory filesystem is heavily based on the ramfs. It -- * extends ramfs by the ability to use swap and honor resource limits -- * which makes it a completely usable filesystem. -- */ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include --#include --#include -- --/* This magic number is used in glibc for posix shared memory */ --#define TMPFS_MAGIC 0x01021994 -- --#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) --#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) --#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) -- --#define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) --#define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) -- --#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) -- --/* info->flags needs VM_flags to handle pagein/truncate races efficiently */ --#define SHMEM_PAGEIN VM_READ --#define SHMEM_TRUNCATE VM_WRITE -- --/* Definition to limit shmem_truncate's steps between cond_rescheds */ --#define LATENCY_LIMIT 64 -- --/* Pretend that each entry is of this size in directory's i_size */ --#define BOGO_DIRENT_SIZE 20 -- --/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ --enum sgp_type { -- SGP_QUICK, /* don't try more than file page cache lookup */ -- SGP_READ, /* don't exceed i_size, don't allocate page */ -- SGP_CACHE, /* don't exceed i_size, may allocate page */ -- SGP_WRITE, /* may exceed i_size, may allocate page */ --}; -- --static int shmem_getpage(struct inode *inode, unsigned long idx, -- struct page **pagep, enum sgp_type sgp, int *type); -- --static inline struct page *shmem_dir_alloc(gfp_t gfp_mask) --{ -- /* -- * The above definition of ENTRIES_PER_PAGE, and the use of -- * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: -- * might be reconsidered if it ever diverges from PAGE_SIZE. -- */ -- return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT); --} -- --static inline void shmem_dir_free(struct page *page) --{ -- __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT); --} -- --static struct page **shmem_dir_map(struct page *page) --{ -- return (struct page **)kmap_atomic(page, KM_USER0); --} -- --static inline void shmem_dir_unmap(struct page **dir) --{ -- kunmap_atomic(dir, KM_USER0); --} -- --static swp_entry_t *shmem_swp_map(struct page *page) --{ -- return (swp_entry_t *)kmap_atomic(page, KM_USER1); --} -- --static inline void shmem_swp_balance_unmap(void) --{ -- /* -- * When passing a pointer to an i_direct entry, to code which -- * also handles indirect entries and so will shmem_swp_unmap, -- * we must arrange for the preempt count to remain in balance. -- * What kmap_atomic of a lowmem page does depends on config -- * and architecture, so pretend to kmap_atomic some lowmem page. -- */ -- (void) kmap_atomic(ZERO_PAGE(0), KM_USER1); --} -- --static inline void shmem_swp_unmap(swp_entry_t *entry) --{ -- kunmap_atomic(entry, KM_USER1); --} -- --static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) --{ -- return sb->s_fs_info; --} -- --/* -- * shmem_file_setup pre-accounts the whole fixed size of a VM object, -- * for shared memory and for shared anonymous (/dev/zero) mappings -- * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1), -- * consistent with the pre-accounting of private mappings ... -- */ --static inline int shmem_acct_size(unsigned long flags, loff_t size) --{ -- return (flags & VM_ACCOUNT)? -- security_vm_enough_memory(VM_ACCT(size)): 0; --} -- --static inline void shmem_unacct_size(unsigned long flags, loff_t size) --{ -- if (flags & VM_ACCOUNT) -- vm_unacct_memory(VM_ACCT(size)); --} -- --/* -- * ... whereas tmpfs objects are accounted incrementally as -- * pages are allocated, in order to allow huge sparse files. -- * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM, -- * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM. -- */ --static inline int shmem_acct_block(unsigned long flags) --{ -- return (flags & VM_ACCOUNT)? -- 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE)); --} -- --static inline void shmem_unacct_blocks(unsigned long flags, long pages) --{ -- if (!(flags & VM_ACCOUNT)) -- vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE)); --} -- --static const struct super_operations shmem_ops; --static const struct address_space_operations shmem_aops; --static const struct file_operations shmem_file_operations; --static const struct inode_operations shmem_inode_operations; --static const struct inode_operations shmem_dir_inode_operations; --static const struct inode_operations shmem_special_inode_operations; --static struct vm_operations_struct shmem_vm_ops; -- --static struct backing_dev_info shmem_backing_dev_info __read_mostly = { -- .ra_pages = 0, /* No readahead */ -- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, -- .unplug_io_fn = default_unplug_io_fn, --}; -- --static LIST_HEAD(shmem_swaplist); --static DEFINE_SPINLOCK(shmem_swaplist_lock); -- --static void shmem_free_blocks(struct inode *inode, long pages) --{ -- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); -- if (sbinfo->max_blocks) { -- spin_lock(&sbinfo->stat_lock); -- sbinfo->free_blocks += pages; -- inode->i_blocks -= pages*BLOCKS_PER_PAGE; -- spin_unlock(&sbinfo->stat_lock); -- } --} -- --/* -- * shmem_recalc_inode - recalculate the size of an inode -- * -- * @inode: inode to recalc -- * -- * We have to calculate the free blocks since the mm can drop -- * undirtied hole pages behind our back. -- * -- * But normally info->alloced == inode->i_mapping->nrpages + info->swapped -- * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped) -- * -- * It has to be called with the spinlock held. -- */ --static void shmem_recalc_inode(struct inode *inode) --{ -- struct shmem_inode_info *info = SHMEM_I(inode); -- long freed; -- -- freed = info->alloced - info->swapped - inode->i_mapping->nrpages; -- if (freed > 0) { -- info->alloced -= freed; -- shmem_unacct_blocks(info->flags, freed); -- shmem_free_blocks(inode, freed); -- } --} -- --/* -- * shmem_swp_entry - find the swap vector position in the info structure -- * -- * @info: info structure for the inode -- * @index: index of the page to find -- * @page: optional page to add to the structure. Has to be preset to -- * all zeros -- * -- * If there is no space allocated yet it will return NULL when -- * page is NULL, else it will use the page for the needed block, -- * setting it to NULL on return to indicate that it has been used. -- * -- * The swap vector is organized the following way: -- * -- * There are SHMEM_NR_DIRECT entries directly stored in the -- * shmem_inode_info structure. So small files do not need an addional -- * allocation. -- * -- * For pages with index > SHMEM_NR_DIRECT there is the pointer -- * i_indirect which points to a page which holds in the first half -- * doubly indirect blocks, in the second half triple indirect blocks: -- * -- * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the -- * following layout (for SHMEM_NR_DIRECT == 16): -- * -- * i_indirect -> dir --> 16-19 -- * | +-> 20-23 -- * | -- * +-->dir2 --> 24-27 -- * | +-> 28-31 -- * | +-> 32-35 -- * | +-> 36-39 -- * | -- * +-->dir3 --> 40-43 -- * +-> 44-47 -- * +-> 48-51 -- * +-> 52-55 -- */ --static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page) --{ -- unsigned long offset; -- struct page **dir; -- struct page *subdir; -- -- if (index < SHMEM_NR_DIRECT) { -- shmem_swp_balance_unmap(); -- return info->i_direct+index; -- } -- if (!info->i_indirect) { -- if (page) { -- info->i_indirect = *page; -- *page = NULL; -- } -- return NULL; /* need another page */ -- } -- -- index -= SHMEM_NR_DIRECT; -- offset = index % ENTRIES_PER_PAGE; -- index /= ENTRIES_PER_PAGE; -- dir = shmem_dir_map(info->i_indirect); -- -- if (index >= ENTRIES_PER_PAGE/2) { -- index -= ENTRIES_PER_PAGE/2; -- dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE; -- index %= ENTRIES_PER_PAGE; -- subdir = *dir; -- if (!subdir) { -- if (page) { -- *dir = *page; -- *page = NULL; -- } -- shmem_dir_unmap(dir); -- return NULL; /* need another page */ -- } -- shmem_dir_unmap(dir); -- dir = shmem_dir_map(subdir); -- } -- -- dir += index; -- subdir = *dir; -- if (!subdir) { -- if (!page || !(subdir = *page)) { -- shmem_dir_unmap(dir); -- return NULL; /* need a page */ -- } -- *dir = subdir; -- *page = NULL; -- } -- shmem_dir_unmap(dir); -- return shmem_swp_map(subdir) + offset; --} -- --static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value) --{ -- long incdec = value? 1: -1; -- -- entry->val = value; -- info->swapped += incdec; -- if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { -- struct page *page = kmap_atomic_to_page(entry); -- set_page_private(page, page_private(page) + incdec); -- } --} -- --/* -- * shmem_swp_alloc - get the position of the swap entry for the page. -- * If it does not exist allocate the entry. -- * -- * @info: info structure for the inode -- * @index: index of the page to find -- * @sgp: check and recheck i_size? skip allocation? -- */ --static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp) --{ -- struct inode *inode = &info->vfs_inode; -- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); -- struct page *page = NULL; -- swp_entry_t *entry; -- -- if (sgp != SGP_WRITE && -- ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) -- return ERR_PTR(-EINVAL); -- -- while (!(entry = shmem_swp_entry(info, index, &page))) { -- if (sgp == SGP_READ) -- return shmem_swp_map(ZERO_PAGE(0)); -- /* -- * Test free_blocks against 1 not 0, since we have 1 data -- * page (and perhaps indirect index pages) yet to allocate: -- * a waste to allocate index if we cannot allocate data. -- */ -- if (sbinfo->max_blocks) { -- spin_lock(&sbinfo->stat_lock); -- if (sbinfo->free_blocks <= 1) { -- spin_unlock(&sbinfo->stat_lock); -- return ERR_PTR(-ENOSPC); -- } -- sbinfo->free_blocks--; -- inode->i_blocks += BLOCKS_PER_PAGE; -- spin_unlock(&sbinfo->stat_lock); -- } -- -- spin_unlock(&info->lock); -- page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO); -- if (page) -- set_page_private(page, 0); -- spin_lock(&info->lock); -- -- if (!page) { -- shmem_free_blocks(inode, 1); -- return ERR_PTR(-ENOMEM); -- } -- if (sgp != SGP_WRITE && -- ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { -- entry = ERR_PTR(-EINVAL); -- break; -- } -- if (info->next_index <= index) -- info->next_index = index + 1; -- } -- if (page) { -- /* another task gave its page, or truncated the file */ -- shmem_free_blocks(inode, 1); -- shmem_dir_free(page); -- } -- if (info->next_index <= index && !IS_ERR(entry)) -- info->next_index = index + 1; -- return entry; --} -- --/* -- * shmem_free_swp - free some swap entries in a directory -- * -- * @dir: pointer to the directory -- * @edir: pointer after last entry of the directory -- * @punch_lock: pointer to spinlock when needed for the holepunch case -- */ --static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, -- spinlock_t *punch_lock) --{ -- spinlock_t *punch_unlock = NULL; -- swp_entry_t *ptr; -- int freed = 0; -- -- for (ptr = dir; ptr < edir; ptr++) { -- if (ptr->val) { -- if (unlikely(punch_lock)) { -- punch_unlock = punch_lock; -- punch_lock = NULL; -- spin_lock(punch_unlock); -- if (!ptr->val) -- continue; -- } -- free_swap_and_cache(*ptr); -- *ptr = (swp_entry_t){0}; -- freed++; -- } -- } -- if (punch_unlock) -- spin_unlock(punch_unlock); -- return freed; --} -- --static int shmem_map_and_free_swp(struct page *subdir, int offset, -- int limit, struct page ***dir, spinlock_t *punch_lock) --{ -- swp_entry_t *ptr; -- int freed = 0; -- -- ptr = shmem_swp_map(subdir); -- for (; offset < limit; offset += LATENCY_LIMIT) { -- int size = limit - offset; -- if (size > LATENCY_LIMIT) -- size = LATENCY_LIMIT; -- freed += shmem_free_swp(ptr+offset, ptr+offset+size, -- punch_lock); -- if (need_resched()) { -- shmem_swp_unmap(ptr); -- if (*dir) { -- shmem_dir_unmap(*dir); -- *dir = NULL; -- } -- cond_resched(); -- ptr = shmem_swp_map(subdir); -- } -- } -- shmem_swp_unmap(ptr); -- return freed; --} -- --static void shmem_free_pages(struct list_head *next) --{ -- struct page *page; -- int freed = 0; -- -- do { -- page = container_of(next, struct page, lru); -- next = next->next; -- shmem_dir_free(page); -- freed++; -- if (freed >= LATENCY_LIMIT) { -- cond_resched(); -- freed = 0; -- } -- } while (next); --} -- --static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) --{ -- struct shmem_inode_info *info = SHMEM_I(inode); -- unsigned long idx; -- unsigned long size; -- unsigned long limit; -- unsigned long stage; -- unsigned long diroff; -- struct page **dir; -- struct page *topdir; -- struct page *middir; -- struct page *subdir; -- swp_entry_t *ptr; -- LIST_HEAD(pages_to_free); -- long nr_pages_to_free = 0; -- long nr_swaps_freed = 0; -- int offset; -- int freed; -- int punch_hole; -- spinlock_t *needs_lock; -- spinlock_t *punch_lock; -- unsigned long upper_limit; -- -- inode->i_ctime = inode->i_mtime = CURRENT_TIME; -- idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -- if (idx >= info->next_index) -- return; -- -- spin_lock(&info->lock); -- info->flags |= SHMEM_TRUNCATE; -- if (likely(end == (loff_t) -1)) { -- limit = info->next_index; -- upper_limit = SHMEM_MAX_INDEX; -- info->next_index = idx; -- needs_lock = NULL; -- punch_hole = 0; -- } else { -- if (end + 1 >= inode->i_size) { /* we may free a little more */ -- limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> -- PAGE_CACHE_SHIFT; -- upper_limit = SHMEM_MAX_INDEX; -- } else { -- limit = (end + 1) >> PAGE_CACHE_SHIFT; -- upper_limit = limit; -- } -- needs_lock = &info->lock; -- punch_hole = 1; -- } -- -- topdir = info->i_indirect; -- if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) { -- info->i_indirect = NULL; -- nr_pages_to_free++; -- list_add(&topdir->lru, &pages_to_free); -- } -- spin_unlock(&info->lock); -- -- if (info->swapped && idx < SHMEM_NR_DIRECT) { -- ptr = info->i_direct; -- size = limit; -- if (size > SHMEM_NR_DIRECT) -- size = SHMEM_NR_DIRECT; -- nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); -- } -- -- /* -- * If there are no indirect blocks or we are punching a hole -- * below indirect blocks, nothing to be done. -- */ -- if (!topdir || limit <= SHMEM_NR_DIRECT) -- goto done2; -- -- /* -- * The truncation case has already dropped info->lock, and we're safe -- * because i_size and next_index have already been lowered, preventing -- * access beyond. But in the punch_hole case, we still need to take -- * the lock when updating the swap directory, because there might be -- * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or -- * shmem_writepage. However, whenever we find we can remove a whole -- * directory page (not at the misaligned start or end of the range), -- * we first NULLify its pointer in the level above, and then have no -- * need to take the lock when updating its contents: needs_lock and -- * punch_lock (either pointing to info->lock or NULL) manage this. -- */ -- -- upper_limit -= SHMEM_NR_DIRECT; -- limit -= SHMEM_NR_DIRECT; -- idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; -- offset = idx % ENTRIES_PER_PAGE; -- idx -= offset; -- -- dir = shmem_dir_map(topdir); -- stage = ENTRIES_PER_PAGEPAGE/2; -- if (idx < ENTRIES_PER_PAGEPAGE/2) { -- middir = topdir; -- diroff = idx/ENTRIES_PER_PAGE; -- } else { -- dir += ENTRIES_PER_PAGE/2; -- dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE; -- while (stage <= idx) -- stage += ENTRIES_PER_PAGEPAGE; -- middir = *dir; -- if (*dir) { -- diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % -- ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; -- if (!diroff && !offset && upper_limit >= stage) { -- if (needs_lock) { -- spin_lock(needs_lock); -- *dir = NULL; -- spin_unlock(needs_lock); -- needs_lock = NULL; -- } else -- *dir = NULL; -- nr_pages_to_free++; -- list_add(&middir->lru, &pages_to_free); -- } -- shmem_dir_unmap(dir); -- dir = shmem_dir_map(middir); -- } else { -- diroff = 0; -- offset = 0; -- idx = stage; -- } -- } -- -- for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { -- if (unlikely(idx == stage)) { -- shmem_dir_unmap(dir); -- dir = shmem_dir_map(topdir) + -- ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; -- while (!*dir) { -- dir++; -- idx += ENTRIES_PER_PAGEPAGE; -- if (idx >= limit) -- goto done1; -- } -- stage = idx + ENTRIES_PER_PAGEPAGE; -- middir = *dir; -- if (punch_hole) -- needs_lock = &info->lock; -- if (upper_limit >= stage) { -- if (needs_lock) { -- spin_lock(needs_lock); -- *dir = NULL; -- spin_unlock(needs_lock); -- needs_lock = NULL; -- } else -- *dir = NULL; -- nr_pages_to_free++; -- list_add(&middir->lru, &pages_to_free); -- } -- shmem_dir_unmap(dir); -- cond_resched(); -- dir = shmem_dir_map(middir); -- diroff = 0; -- } -- punch_lock = needs_lock; -- subdir = dir[diroff]; -- if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { -- if (needs_lock) { -- spin_lock(needs_lock); -- dir[diroff] = NULL; -- spin_unlock(needs_lock); -- punch_lock = NULL; -- } else -- dir[diroff] = NULL; -- nr_pages_to_free++; -- list_add(&subdir->lru, &pages_to_free); -- } -- if (subdir && page_private(subdir) /* has swap entries */) { -- size = limit - idx; -- if (size > ENTRIES_PER_PAGE) -- size = ENTRIES_PER_PAGE; -- freed = shmem_map_and_free_swp(subdir, -- offset, size, &dir, punch_lock); -- if (!dir) -- dir = shmem_dir_map(middir); -- nr_swaps_freed += freed; -- if (offset || punch_lock) { -- spin_lock(&info->lock); -- set_page_private(subdir, -- page_private(subdir) - freed); -- spin_unlock(&info->lock); -- } else -- BUG_ON(page_private(subdir) != freed); -- } -- offset = 0; -- } --done1: -- shmem_dir_unmap(dir); --done2: -- if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { -- /* -- * Call truncate_inode_pages again: racing shmem_unuse_inode -- * may have swizzled a page in from swap since vmtruncate or -- * generic_delete_inode did it, before we lowered next_index. -- * Also, though shmem_getpage checks i_size before adding to -- * cache, no recheck after: so fix the narrow window there too. -- * -- * Recalling truncate_inode_pages_range and unmap_mapping_range -- * every time for punch_hole (which never got a chance to clear -- * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive, -- * yet hardly ever necessary: try to optimize them out later. -- */ -- truncate_inode_pages_range(inode->i_mapping, start, end); -- if (punch_hole) -- unmap_mapping_range(inode->i_mapping, start, -- end - start, 1); -- } -- -- spin_lock(&info->lock); -- info->flags &= ~SHMEM_TRUNCATE; -- info->swapped -= nr_swaps_freed; -- if (nr_pages_to_free) -- shmem_free_blocks(inode, nr_pages_to_free); -- shmem_recalc_inode(inode); -- spin_unlock(&info->lock); -- -- /* -- * Empty swap vector directory pages to be freed? -- */ -- if (!list_empty(&pages_to_free)) { -- pages_to_free.prev->next = NULL; -- shmem_free_pages(pages_to_free.next); -- } --} -- --static void shmem_truncate(struct inode *inode) --{ -- shmem_truncate_range(inode, inode->i_size, (loff_t)-1); --} -- --static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) --{ -- struct inode *inode = dentry->d_inode; -- struct page *page = NULL; -- int error; -- -- if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { -- if (attr->ia_size < inode->i_size) { -- /* -- * If truncating down to a partial page, then -- * if that page is already allocated, hold it -- * in memory until the truncation is over, so -- * truncate_partial_page cannnot miss it were -- * it assigned to swap. -- */ -- if (attr->ia_size & (PAGE_CACHE_SIZE-1)) { -- (void) shmem_getpage(inode, -- attr->ia_size>>PAGE_CACHE_SHIFT, -- &page, SGP_READ, NULL); -- } -- /* -- * Reset SHMEM_PAGEIN flag so that shmem_truncate can -- * detect if any pages might have been added to cache -- * after truncate_inode_pages. But we needn't bother -- * if it's being fully truncated to zero-length: the -- * nrpages check is efficient enough in that case. -- */ -- if (attr->ia_size) { -- struct shmem_inode_info *info = SHMEM_I(inode); -- spin_lock(&info->lock); -- info->flags &= ~SHMEM_PAGEIN; -- spin_unlock(&info->lock); -- } -- } -- } -- -- error = inode_change_ok(inode, attr); -- if (!error) -- error = inode_setattr(inode, attr); --#ifdef CONFIG_TMPFS_POSIX_ACL -- if (!error && (attr->ia_valid & ATTR_MODE)) -- error = generic_acl_chmod(inode, &shmem_acl_ops); --#endif -- if (page) -- page_cache_release(page); -- return error; --} -- --static void shmem_delete_inode(struct inode *inode) --{ -- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); -- struct shmem_inode_info *info = SHMEM_I(inode); -- -- if (inode->i_op->truncate == shmem_truncate) { -- truncate_inode_pages(inode->i_mapping, 0); -- shmem_unacct_size(info->flags, inode->i_size); -- inode->i_size = 0; -- shmem_truncate(inode); -- if (!list_empty(&info->swaplist)) { -- spin_lock(&shmem_swaplist_lock); -- list_del_init(&info->swaplist); -- spin_unlock(&shmem_swaplist_lock); -- } -- } -- BUG_ON(inode->i_blocks); -- if (sbinfo->max_inodes) { -- spin_lock(&sbinfo->stat_lock); -- sbinfo->free_inodes++; -- spin_unlock(&sbinfo->stat_lock); -- } -- clear_inode(inode); --} -- --static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) --{ -- swp_entry_t *ptr; -- -- for (ptr = dir; ptr < edir; ptr++) { -- if (ptr->val == entry.val) -- return ptr - dir; -- } -- return -1; --} -- --static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) --{ -- struct inode *inode; -- unsigned long idx; -- unsigned long size; -- unsigned long limit; -- unsigned long stage; -- struct page **dir; -- struct page *subdir; -- swp_entry_t *ptr; -- int offset; -- -- idx = 0; -- ptr = info->i_direct; -- spin_lock(&info->lock); -- limit = info->next_index; -- size = limit; -- if (size > SHMEM_NR_DIRECT) -- size = SHMEM_NR_DIRECT; -- offset = shmem_find_swp(entry, ptr, ptr+size); -- if (offset >= 0) { -- shmem_swp_balance_unmap(); -- goto found; -- } -- if (!info->i_indirect) -- goto lost2; -- -- dir = shmem_dir_map(info->i_indirect); -- stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2; -- -- for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { -- if (unlikely(idx == stage)) { -- shmem_dir_unmap(dir-1); -- dir = shmem_dir_map(info->i_indirect) + -- ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; -- while (!*dir) { -- dir++; -- idx += ENTRIES_PER_PAGEPAGE; -- if (idx >= limit) -- goto lost1; -- } -- stage = idx + ENTRIES_PER_PAGEPAGE; -- subdir = *dir; -- shmem_dir_unmap(dir); -- dir = shmem_dir_map(subdir); -- } -- subdir = *dir; -- if (subdir && page_private(subdir)) { -- ptr = shmem_swp_map(subdir); -- size = limit - idx; -- if (size > ENTRIES_PER_PAGE) -- size = ENTRIES_PER_PAGE; -- offset = shmem_find_swp(entry, ptr, ptr+size); -- if (offset >= 0) { -- shmem_dir_unmap(dir); -- goto found; -- } -- shmem_swp_unmap(ptr); -- } -- } --lost1: -- shmem_dir_unmap(dir-1); --lost2: -- spin_unlock(&info->lock); -- return 0; --found: -- idx += offset; -- inode = &info->vfs_inode; -- if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) { -- info->flags |= SHMEM_PAGEIN; -- shmem_swp_set(info, ptr + offset, 0); -- } -- shmem_swp_unmap(ptr); -- spin_unlock(&info->lock); -- /* -- * Decrement swap count even when the entry is left behind: -- * try_to_unuse will skip over mms, then reincrement count. -- */ -- swap_free(entry); -- return 1; --} -- --/* -- * shmem_unuse() search for an eventually swapped out shmem page. -- */ --int shmem_unuse(swp_entry_t entry, struct page *page) --{ -- struct list_head *p, *next; -- struct shmem_inode_info *info; -- int found = 0; -- -- spin_lock(&shmem_swaplist_lock); -- list_for_each_safe(p, next, &shmem_swaplist) { -- info = list_entry(p, struct shmem_inode_info, swaplist); -- if (!info->swapped) -- list_del_init(&info->swaplist); -- else if (shmem_unuse_inode(info, entry, page)) { -- /* move head to start search for next from here */ -- list_move_tail(&shmem_swaplist, &info->swaplist); -- found = 1; -- break; -- } -- } -- spin_unlock(&shmem_swaplist_lock); -- return found; --} -- --/* -- * Move the page from the page cache to the swap cache. -- */ --static int shmem_writepage(struct page *page, struct writeback_control *wbc) --{ -- struct shmem_inode_info *info; -- swp_entry_t *entry, swap; -- struct address_space *mapping; -- unsigned long index; -- struct inode *inode; -- -- BUG_ON(!PageLocked(page)); -- /* -- * shmem_backing_dev_info's capabilities prevent regular writeback or -- * sync from ever calling shmem_writepage; but a stacking filesystem -- * may use the ->writepage of its underlying filesystem, in which case -- * we want to do nothing when that underlying filesystem is tmpfs -- * (writing out to swap is useful as a response to memory pressure, but -- * of no use to stabilize the data) - just redirty the page, unlock it -- * and claim success in this case. AOP_WRITEPAGE_ACTIVATE, and the -- * page_mapped check below, must be avoided unless we're in reclaim. -- */ -- if (!wbc->for_reclaim) { -- set_page_dirty(page); -- unlock_page(page); -- return 0; -- } -- BUG_ON(page_mapped(page)); -- -- mapping = page->mapping; -- index = page->index; -- inode = mapping->host; -- info = SHMEM_I(inode); -- if (info->flags & VM_LOCKED) -- goto redirty; -- swap = get_swap_page(); -- if (!swap.val) -- goto redirty; -- -- spin_lock(&info->lock); -- shmem_recalc_inode(inode); -- if (index >= info->next_index) { -- BUG_ON(!(info->flags & SHMEM_TRUNCATE)); -- goto unlock; -- } -- entry = shmem_swp_entry(info, index, NULL); -- BUG_ON(!entry); -- BUG_ON(entry->val); -- -- if (move_to_swap_cache(page, swap) == 0) { -- shmem_swp_set(info, entry, swap.val); -- shmem_swp_unmap(entry); -- spin_unlock(&info->lock); -- if (list_empty(&info->swaplist)) { -- spin_lock(&shmem_swaplist_lock); -- /* move instead of add in case we're racing */ -- list_move_tail(&info->swaplist, &shmem_swaplist); -- spin_unlock(&shmem_swaplist_lock); -- } -- unlock_page(page); -- return 0; -- } -- -- shmem_swp_unmap(entry); --unlock: -- spin_unlock(&info->lock); -- swap_free(swap); --redirty: -- set_page_dirty(page); -- return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */ --} -- --#ifdef CONFIG_NUMA --static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes) --{ -- char *nodelist = strchr(value, ':'); -- int err = 1; -- -- if (nodelist) { -- /* NUL-terminate policy string */ -- *nodelist++ = '\0'; -- if (nodelist_parse(nodelist, *policy_nodes)) -- goto out; -- if (!nodes_subset(*policy_nodes, node_online_map)) -- goto out; -- } -- if (!strcmp(value, "default")) { -- *policy = MPOL_DEFAULT; -- /* Don't allow a nodelist */ -- if (!nodelist) -- err = 0; -- } else if (!strcmp(value, "prefer")) { -- *policy = MPOL_PREFERRED; -- /* Insist on a nodelist of one node only */ -- if (nodelist) { -- char *rest = nodelist; -- while (isdigit(*rest)) -- rest++; -- if (!*rest) -- err = 0; -- } -- } else if (!strcmp(value, "bind")) { -- *policy = MPOL_BIND; -- /* Insist on a nodelist */ -- if (nodelist) -- err = 0; -- } else if (!strcmp(value, "interleave")) { -- *policy = MPOL_INTERLEAVE; -- /* Default to nodes online if no nodelist */ -- if (!nodelist) -- *policy_nodes = node_online_map; -- err = 0; -- } --out: -- /* Restore string for error message */ -- if (nodelist) -- *--nodelist = ':'; -- return err; --} -- --static struct page *shmem_swapin_async(struct shared_policy *p, -- swp_entry_t entry, unsigned long idx) --{ -- struct page *page; -- struct vm_area_struct pvma; -- -- /* Create a pseudo vma that just contains the policy */ -- memset(&pvma, 0, sizeof(struct vm_area_struct)); -- pvma.vm_end = PAGE_SIZE; -- pvma.vm_pgoff = idx; -- pvma.vm_policy = mpol_shared_policy_lookup(p, idx); -- page = read_swap_cache_async(entry, &pvma, 0); -- mpol_free(pvma.vm_policy); -- return page; --} -- --struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry, -- unsigned long idx) --{ -- struct shared_policy *p = &info->policy; -- int i, num; -- struct page *page; -- unsigned long offset; -- -- num = valid_swaphandles(entry, &offset); -- for (i = 0; i < num; offset++, i++) { -- page = shmem_swapin_async(p, -- swp_entry(swp_type(entry), offset), idx); -- if (!page) -- break; -- page_cache_release(page); -- } -- lru_add_drain(); /* Push any new pages onto the LRU now */ -- return shmem_swapin_async(p, entry, idx); --} -- --static struct page * --shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info, -- unsigned long idx) --{ -- struct vm_area_struct pvma; -- struct page *page; -- -- memset(&pvma, 0, sizeof(struct vm_area_struct)); -- pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); -- pvma.vm_pgoff = idx; -- pvma.vm_end = PAGE_SIZE; -- page = alloc_page_vma(gfp | __GFP_ZERO, &pvma, 0); -- mpol_free(pvma.vm_policy); -- return page; --} --#else --static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes) --{ -- return 1; --} -- --static inline struct page * --shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx) --{ -- swapin_readahead(entry, 0, NULL); -- return read_swap_cache_async(entry, NULL, 0); --} -- --static inline struct page * --shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx) --{ -- return alloc_page(gfp | __GFP_ZERO); --} --#endif -- --/* -- * shmem_getpage - either get the page from swap or allocate a new one -- * -- * If we allocate a new one we do not mark it dirty. That's up to the -- * vm. If we swap it in we mark it dirty since we also free the swap -- * entry since a page cannot live in both the swap and page cache -- */ --static int shmem_getpage(struct inode *inode, unsigned long idx, -- struct page **pagep, enum sgp_type sgp, int *type) --{ -- struct address_space *mapping = inode->i_mapping; -- struct shmem_inode_info *info = SHMEM_I(inode); -- struct shmem_sb_info *sbinfo; -- struct page *filepage = *pagep; -- struct page *swappage; -- swp_entry_t *entry; -- swp_entry_t swap; -- int error; -- -- if (idx >= SHMEM_MAX_INDEX) -- return -EFBIG; -- /* -- * Normally, filepage is NULL on entry, and either found -- * uptodate immediately, or allocated and zeroed, or read -- * in under swappage, which is then assigned to filepage. -- * But shmem_prepare_write passes in a locked filepage, -- * which may be found not uptodate by other callers too, -- * and may need to be copied from the swappage read in. -- */ --repeat: -- if (!filepage) -- filepage = find_lock_page(mapping, idx); -- if (filepage && PageUptodate(filepage)) -- goto done; -- error = 0; -- if (sgp == SGP_QUICK) -- goto failed; -- -- spin_lock(&info->lock); -- shmem_recalc_inode(inode); -- entry = shmem_swp_alloc(info, idx, sgp); -- if (IS_ERR(entry)) { -- spin_unlock(&info->lock); -- error = PTR_ERR(entry); -- goto failed; -- } -- swap = *entry; -- -- if (swap.val) { -- /* Look it up and read it in.. */ -- swappage = lookup_swap_cache(swap); -- if (!swappage) { -- shmem_swp_unmap(entry); -- /* here we actually do the io */ -- if (type && *type == VM_FAULT_MINOR) { -- __count_vm_event(PGMAJFAULT); -- *type = VM_FAULT_MAJOR; -- } -- spin_unlock(&info->lock); -- swappage = shmem_swapin(info, swap, idx); -- if (!swappage) { -- spin_lock(&info->lock); -- entry = shmem_swp_alloc(info, idx, sgp); -- if (IS_ERR(entry)) -- error = PTR_ERR(entry); -- else { -- if (entry->val == swap.val) -- error = -ENOMEM; -- shmem_swp_unmap(entry); -- } -- spin_unlock(&info->lock); -- if (error) -- goto failed; -- goto repeat; -- } -- wait_on_page_locked(swappage); -- page_cache_release(swappage); -- goto repeat; -- } -- -- /* We have to do this with page locked to prevent races */ -- if (TestSetPageLocked(swappage)) { -- shmem_swp_unmap(entry); -- spin_unlock(&info->lock); -- wait_on_page_locked(swappage); -- page_cache_release(swappage); -- goto repeat; -- } -- if (PageWriteback(swappage)) { -- shmem_swp_unmap(entry); -- spin_unlock(&info->lock); -- wait_on_page_writeback(swappage); -- unlock_page(swappage); -- page_cache_release(swappage); -- goto repeat; -- } -- if (!PageUptodate(swappage)) { -- shmem_swp_unmap(entry); -- spin_unlock(&info->lock); -- unlock_page(swappage); -- page_cache_release(swappage); -- error = -EIO; -- goto failed; -- } -- -- if (filepage) { -- shmem_swp_set(info, entry, 0); -- shmem_swp_unmap(entry); -- delete_from_swap_cache(swappage); -- spin_unlock(&info->lock); -- copy_highpage(filepage, swappage); -- unlock_page(swappage); -- page_cache_release(swappage); -- flush_dcache_page(filepage); -- SetPageUptodate(filepage); -- set_page_dirty(filepage); -- swap_free(swap); -- } else if (!(error = move_from_swap_cache( -- swappage, idx, mapping))) { -- info->flags |= SHMEM_PAGEIN; -- shmem_swp_set(info, entry, 0); -- shmem_swp_unmap(entry); -- spin_unlock(&info->lock); -- filepage = swappage; -- swap_free(swap); -- } else { -- shmem_swp_unmap(entry); -- spin_unlock(&info->lock); -- unlock_page(swappage); -- page_cache_release(swappage); -- if (error == -ENOMEM) { -- /* let kswapd refresh zone for GFP_ATOMICs */ -- congestion_wait(WRITE, HZ/50); -- } -- goto repeat; -- } -- } else if (sgp == SGP_READ && !filepage) { -- shmem_swp_unmap(entry); -- filepage = find_get_page(mapping, idx); -- if (filepage && -- (!PageUptodate(filepage) || TestSetPageLocked(filepage))) { -- spin_unlock(&info->lock); -- wait_on_page_locked(filepage); -- page_cache_release(filepage); -- filepage = NULL; -- goto repeat; -- } -- spin_unlock(&info->lock); -- } else { -- shmem_swp_unmap(entry); -- sbinfo = SHMEM_SB(inode->i_sb); -- if (sbinfo->max_blocks) { -- spin_lock(&sbinfo->stat_lock); -- if (sbinfo->free_blocks == 0 || -- shmem_acct_block(info->flags)) { -- spin_unlock(&sbinfo->stat_lock); -- spin_unlock(&info->lock); -- error = -ENOSPC; -- goto failed; -- } -- sbinfo->free_blocks--; -- inode->i_blocks += BLOCKS_PER_PAGE; -- spin_unlock(&sbinfo->stat_lock); -- } else if (shmem_acct_block(info->flags)) { -- spin_unlock(&info->lock); -- error = -ENOSPC; -- goto failed; -- } -- -- if (!filepage) { -- spin_unlock(&info->lock); -- filepage = shmem_alloc_page(mapping_gfp_mask(mapping), -- info, -- idx); -- if (!filepage) { -- shmem_unacct_blocks(info->flags, 1); -- shmem_free_blocks(inode, 1); -- error = -ENOMEM; -- goto failed; -- } -- -- spin_lock(&info->lock); -- entry = shmem_swp_alloc(info, idx, sgp); -- if (IS_ERR(entry)) -- error = PTR_ERR(entry); -- else { -- swap = *entry; -- shmem_swp_unmap(entry); -- } -- if (error || swap.val || 0 != add_to_page_cache_lru( -- filepage, mapping, idx, GFP_ATOMIC)) { -- spin_unlock(&info->lock); -- page_cache_release(filepage); -- shmem_unacct_blocks(info->flags, 1); -- shmem_free_blocks(inode, 1); -- filepage = NULL; -- if (error) -- goto failed; -- goto repeat; -- } -- info->flags |= SHMEM_PAGEIN; -- } -- -- info->alloced++; -- spin_unlock(&info->lock); -- flush_dcache_page(filepage); -- SetPageUptodate(filepage); -- } --done: -- if (*pagep != filepage) { -- unlock_page(filepage); -- *pagep = filepage; -- } -- return 0; -- --failed: -- if (*pagep != filepage) { -- unlock_page(filepage); -- page_cache_release(filepage); -- } -- return error; --} -- --static struct page *shmem_nopage(struct vm_area_struct *vma, -- unsigned long address, int *type) --{ -- struct inode *inode = vma->vm_file->f_path.dentry->d_inode; -- struct page *page = NULL; -- unsigned long idx; -- int error; -- -- idx = (address - vma->vm_start) >> PAGE_SHIFT; -- idx += vma->vm_pgoff; -- idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; -- if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode)) -- return NOPAGE_SIGBUS; -- -- error = shmem_getpage(inode, idx, &page, SGP_CACHE, type); -- if (error) -- return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; -- -- mark_page_accessed(page); -- return page; --} -- --static int shmem_populate(struct vm_area_struct *vma, -- unsigned long addr, unsigned long len, -- pgprot_t prot, unsigned long pgoff, int nonblock) --{ -- struct inode *inode = vma->vm_file->f_path.dentry->d_inode; -- struct mm_struct *mm = vma->vm_mm; -- enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE; -- unsigned long size; -- -- size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; -- if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size) -- return -EINVAL; -- -- while ((long) len > 0) { -- struct page *page = NULL; -- int err; -- /* -- * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE -- */ -- err = shmem_getpage(inode, pgoff, &page, sgp, NULL); -- if (err) -- return err; -- /* Page may still be null, but only if nonblock was set. */ -- if (page) { -- mark_page_accessed(page); -- err = install_page(mm, vma, addr, page, prot); -- if (err) { -- page_cache_release(page); -- return err; -- } -- } else if (vma->vm_flags & VM_NONLINEAR) { -- /* No page was found just because we can't read it in -- * now (being here implies nonblock != 0), but the page -- * may exist, so set the PTE to fault it in later. */ -- err = install_file_pte(mm, vma, addr, pgoff, prot); -- if (err) -- return err; -- } -- -- len -= PAGE_SIZE; -- addr += PAGE_SIZE; -- pgoff++; -- } -- return 0; --} -- --#ifdef CONFIG_NUMA --int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) --{ -- struct inode *i = vma->vm_file->f_path.dentry->d_inode; -- return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new); --} -- --struct mempolicy * --shmem_get_policy(struct vm_area_struct *vma, unsigned long addr) --{ -- struct inode *i = vma->vm_file->f_path.dentry->d_inode; -- unsigned long idx; -- -- idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; -- return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx); --} --#endif -- --int shmem_lock(struct file *file, int lock, struct user_struct *user) --{ -- struct inode *inode = file->f_path.dentry->d_inode; -- struct shmem_inode_info *info = SHMEM_I(inode); -- int retval = -ENOMEM; -- -- spin_lock(&info->lock); -- if (lock && !(info->flags & VM_LOCKED)) { -- if (!user_shm_lock(inode->i_size, user)) -- goto out_nomem; -- info->flags |= VM_LOCKED; -- } -- if (!lock && (info->flags & VM_LOCKED) && user) { -- user_shm_unlock(inode->i_size, user); -- info->flags &= ~VM_LOCKED; -- } -- retval = 0; --out_nomem: -- spin_unlock(&info->lock); -- return retval; --} -- --static int shmem_mmap(struct file *file, struct vm_area_struct *vma) --{ -- file_accessed(file); -- vma->vm_ops = &shmem_vm_ops; -- return 0; --} -- --static struct inode * --shmem_get_inode(struct super_block *sb, int mode, dev_t dev) --{ -- struct inode *inode; -- struct shmem_inode_info *info; -- struct shmem_sb_info *sbinfo = SHMEM_SB(sb); -- -- if (sbinfo->max_inodes) { -- spin_lock(&sbinfo->stat_lock); -- if (!sbinfo->free_inodes) { -- spin_unlock(&sbinfo->stat_lock); -- return NULL; -- } -- sbinfo->free_inodes--; -- spin_unlock(&sbinfo->stat_lock); -- } -- -- inode = new_inode(sb); -- if (inode) { -- inode->i_mode = mode; -- inode->i_uid = current->fsuid; -- inode->i_gid = current->fsgid; -- inode->i_blocks = 0; -- inode->i_mapping->a_ops = &shmem_aops; -- inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; -- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; -- inode->i_generation = get_seconds(); -- info = SHMEM_I(inode); -- memset(info, 0, (char *)inode - (char *)info); -- spin_lock_init(&info->lock); -- INIT_LIST_HEAD(&info->swaplist); -- -- switch (mode & S_IFMT) { -- default: -- inode->i_op = &shmem_special_inode_operations; -- init_special_inode(inode, mode, dev); -- break; -- case S_IFREG: -- inode->i_op = &shmem_inode_operations; -- inode->i_fop = &shmem_file_operations; -- mpol_shared_policy_init(&info->policy, sbinfo->policy, -- &sbinfo->policy_nodes); -- break; -- case S_IFDIR: -- inc_nlink(inode); -- /* Some things misbehave if size == 0 on a directory */ -- inode->i_size = 2 * BOGO_DIRENT_SIZE; -- inode->i_op = &shmem_dir_inode_operations; -- inode->i_fop = &simple_dir_operations; -- break; -- case S_IFLNK: -- /* -- * Must not load anything in the rbtree, -- * mpol_free_shared_policy will not be called. -- */ -- mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, -- NULL); -- break; -- } -- } else if (sbinfo->max_inodes) { -- spin_lock(&sbinfo->stat_lock); -- sbinfo->free_inodes++; -- spin_unlock(&sbinfo->stat_lock); -- } -- return inode; --} -- --#ifdef CONFIG_TMPFS --static const struct inode_operations shmem_symlink_inode_operations; --static const struct inode_operations shmem_symlink_inline_operations; -- --/* -- * Normally tmpfs makes no use of shmem_prepare_write, but it -- * lets a tmpfs file be used read-write below the loop driver. -- */ --static int --shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) --{ -- struct inode *inode = page->mapping->host; -- return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL); --} -- --static ssize_t --shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) --{ -- struct inode *inode = file->f_path.dentry->d_inode; -- loff_t pos; -- unsigned long written; -- ssize_t err; -- -- if ((ssize_t) count < 0) -- return -EINVAL; -- -- if (!access_ok(VERIFY_READ, buf, count)) -- return -EFAULT; -- -- mutex_lock(&inode->i_mutex); -- -- pos = *ppos; -- written = 0; -- -- err = generic_write_checks(file, &pos, &count, 0); -- if (err || !count) -- goto out; -- -- err = remove_suid(file->f_path.dentry); -- if (err) -- goto out; -- -- inode->i_ctime = inode->i_mtime = CURRENT_TIME; -- -- do { -- struct page *page = NULL; -- unsigned long bytes, index, offset; -- char *kaddr; -- int left; -- -- offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ -- index = pos >> PAGE_CACHE_SHIFT; -- bytes = PAGE_CACHE_SIZE - offset; -- if (bytes > count) -- bytes = count; -- -- /* -- * We don't hold page lock across copy from user - -- * what would it guard against? - so no deadlock here. -- * But it still may be a good idea to prefault below. -- */ -- -- err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL); -- if (err) -- break; -- -- left = bytes; -- if (PageHighMem(page)) { -- volatile unsigned char dummy; -- __get_user(dummy, buf); -- __get_user(dummy, buf + bytes - 1); -- -- kaddr = kmap_atomic(page, KM_USER0); -- left = __copy_from_user_inatomic(kaddr + offset, -- buf, bytes); -- kunmap_atomic(kaddr, KM_USER0); -- } -- if (left) { -- kaddr = kmap(page); -- left = __copy_from_user(kaddr + offset, buf, bytes); -- kunmap(page); -- } -- -- written += bytes; -- count -= bytes; -- pos += bytes; -- buf += bytes; -- if (pos > inode->i_size) -- i_size_write(inode, pos); -- -- flush_dcache_page(page); -- set_page_dirty(page); -- mark_page_accessed(page); -- page_cache_release(page); -- -- if (left) { -- pos -= left; -- written -= left; -- err = -EFAULT; -- break; -- } -- -- /* -- * Our dirty pages are not counted in nr_dirty, -- * and we do not attempt to balance dirty pages. -- */ -- -- cond_resched(); -- } while (count); -- -- *ppos = pos; -- if (written) -- err = written; --out: -- mutex_unlock(&inode->i_mutex); -- return err; --} -- --static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor) --{ -- struct inode *inode = filp->f_path.dentry->d_inode; -- struct address_space *mapping = inode->i_mapping; -- unsigned long index, offset; -- -- index = *ppos >> PAGE_CACHE_SHIFT; -- offset = *ppos & ~PAGE_CACHE_MASK; -- -- for (;;) { -- struct page *page = NULL; -- unsigned long end_index, nr, ret; -- loff_t i_size = i_size_read(inode); -- -- end_index = i_size >> PAGE_CACHE_SHIFT; -- if (index > end_index) -- break; -- if (index == end_index) { -- nr = i_size & ~PAGE_CACHE_MASK; -- if (nr <= offset) -- break; -- } -- -- desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL); -- if (desc->error) { -- if (desc->error == -EINVAL) -- desc->error = 0; -- break; -- } -- -- /* -- * We must evaluate after, since reads (unlike writes) -- * are called without i_mutex protection against truncate -- */ -- nr = PAGE_CACHE_SIZE; -- i_size = i_size_read(inode); -- end_index = i_size >> PAGE_CACHE_SHIFT; -- if (index == end_index) { -- nr = i_size & ~PAGE_CACHE_MASK; -- if (nr <= offset) { -- if (page) -- page_cache_release(page); -- break; -- } -- } -- nr -= offset; -- -- if (page) { -- /* -- * If users can be writing to this page using arbitrary -- * virtual addresses, take care about potential aliasing -- * before reading the page on the kernel side. -- */ -- if (mapping_writably_mapped(mapping)) -- flush_dcache_page(page); -- /* -- * Mark the page accessed if we read the beginning. -- */ -- if (!offset) -- mark_page_accessed(page); -- } else { -- page = ZERO_PAGE(0); -- page_cache_get(page); -- } -- -- /* -- * Ok, we have the page, and it's up-to-date, so -- * now we can copy it to user space... -- * -- * The actor routine returns how many bytes were actually used.. -- * NOTE! This may not be the same as how much of a user buffer -- * we filled up (we may be padding etc), so we can only update -- * "pos" here (the actor routine has to update the user buffer -- * pointers and the remaining count). -- */ -- ret = actor(desc, page, offset, nr); -- offset += ret; -- index += offset >> PAGE_CACHE_SHIFT; -- offset &= ~PAGE_CACHE_MASK; -- -- page_cache_release(page); -- if (ret != nr || !desc->count) -- break; -- -- cond_resched(); -- } -- -- *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; -- file_accessed(filp); --} -- --static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) --{ -- read_descriptor_t desc; -- -- if ((ssize_t) count < 0) -- return -EINVAL; -- if (!access_ok(VERIFY_WRITE, buf, count)) -- return -EFAULT; -- if (!count) -- return 0; -- -- desc.written = 0; -- desc.count = count; -- desc.arg.buf = buf; -- desc.error = 0; -- -- do_shmem_file_read(filp, ppos, &desc, file_read_actor); -- if (desc.written) -- return desc.written; -- return desc.error; --} -- --static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos, -- size_t count, read_actor_t actor, void *target) --{ -- read_descriptor_t desc; -- -- if (!count) -- return 0; -- -- desc.written = 0; -- desc.count = count; -- desc.arg.data = target; -- desc.error = 0; -- -- do_shmem_file_read(in_file, ppos, &desc, actor); -- if (desc.written) -- return desc.written; -- return desc.error; --} -- --static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) --{ -- struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); -- -- buf->f_type = TMPFS_MAGIC; -- buf->f_bsize = PAGE_CACHE_SIZE; -- buf->f_namelen = NAME_MAX; -- spin_lock(&sbinfo->stat_lock); -- if (sbinfo->max_blocks) { -- buf->f_blocks = sbinfo->max_blocks; -- buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; -- } -- if (sbinfo->max_inodes) { -- buf->f_files = sbinfo->max_inodes; -- buf->f_ffree = sbinfo->free_inodes; -- } -- /* else leave those fields 0 like simple_statfs */ -- spin_unlock(&sbinfo->stat_lock); -- return 0; --} -- --/* -- * File creation. Allocate an inode, and we're done.. -- */ --static int --shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) --{ -- struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev); -- int error = -ENOSPC; -- -- if (inode) { -- error = security_inode_init_security(inode, dir, NULL, NULL, -- NULL); -- if (error) { -- if (error != -EOPNOTSUPP) { -- iput(inode); -- return error; -- } -- } -- error = shmem_acl_init(inode, dir); -- if (error) { -- iput(inode); -- return error; -- } -- if (dir->i_mode & S_ISGID) { -- inode->i_gid = dir->i_gid; -- if (S_ISDIR(mode)) -- inode->i_mode |= S_ISGID; -- } -- dir->i_size += BOGO_DIRENT_SIZE; -- dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- d_instantiate(dentry, inode); -- dget(dentry); /* Extra count - pin the dentry in core */ -- } -- return error; --} -- --static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode) --{ -- int error; -- -- if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0))) -- return error; -- inc_nlink(dir); -- return 0; --} -- --static int shmem_create(struct inode *dir, struct dentry *dentry, int mode, -- struct nameidata *nd) --{ -- return shmem_mknod(dir, dentry, mode | S_IFREG, 0); --} -- --/* -- * Link a file.. -- */ --static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) --{ -- struct inode *inode = old_dentry->d_inode; -- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); -- -- /* -- * No ordinary (disk based) filesystem counts links as inodes; -- * but each new link needs a new dentry, pinning lowmem, and -- * tmpfs dentries cannot be pruned until they are unlinked. -- */ -- if (sbinfo->max_inodes) { -- spin_lock(&sbinfo->stat_lock); -- if (!sbinfo->free_inodes) { -- spin_unlock(&sbinfo->stat_lock); -- return -ENOSPC; -- } -- sbinfo->free_inodes--; -- spin_unlock(&sbinfo->stat_lock); -- } -- -- dir->i_size += BOGO_DIRENT_SIZE; -- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- inc_nlink(inode); -- atomic_inc(&inode->i_count); /* New dentry reference */ -- dget(dentry); /* Extra pinning count for the created dentry */ -- d_instantiate(dentry, inode); -- return 0; --} -- --static int shmem_unlink(struct inode *dir, struct dentry *dentry) --{ -- struct inode *inode = dentry->d_inode; -- -- if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) { -- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); -- if (sbinfo->max_inodes) { -- spin_lock(&sbinfo->stat_lock); -- sbinfo->free_inodes++; -- spin_unlock(&sbinfo->stat_lock); -- } -- } -- -- dir->i_size -= BOGO_DIRENT_SIZE; -- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- drop_nlink(inode); -- dput(dentry); /* Undo the count from "create" - this does all the work */ -- return 0; --} -- --static int shmem_rmdir(struct inode *dir, struct dentry *dentry) --{ -- if (!simple_empty(dentry)) -- return -ENOTEMPTY; -- -- drop_nlink(dentry->d_inode); -- drop_nlink(dir); -- return shmem_unlink(dir, dentry); --} -- --/* -- * The VFS layer already does all the dentry stuff for rename, -- * we just have to decrement the usage count for the target if -- * it exists so that the VFS layer correctly free's it when it -- * gets overwritten. -- */ --static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) --{ -- struct inode *inode = old_dentry->d_inode; -- int they_are_dirs = S_ISDIR(inode->i_mode); -- -- if (!simple_empty(new_dentry)) -- return -ENOTEMPTY; -- -- if (new_dentry->d_inode) { -- (void) shmem_unlink(new_dir, new_dentry); -- if (they_are_dirs) -- drop_nlink(old_dir); -- } else if (they_are_dirs) { -- drop_nlink(old_dir); -- inc_nlink(new_dir); -- } -- -- old_dir->i_size -= BOGO_DIRENT_SIZE; -- new_dir->i_size += BOGO_DIRENT_SIZE; -- old_dir->i_ctime = old_dir->i_mtime = -- new_dir->i_ctime = new_dir->i_mtime = -- inode->i_ctime = CURRENT_TIME; -- return 0; --} -- --static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname) --{ -- int error; -- int len; -- struct inode *inode; -- struct page *page = NULL; -- char *kaddr; -- struct shmem_inode_info *info; -- -- len = strlen(symname) + 1; -- if (len > PAGE_CACHE_SIZE) -- return -ENAMETOOLONG; -- -- inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); -- if (!inode) -- return -ENOSPC; -- -- error = security_inode_init_security(inode, dir, NULL, NULL, -- NULL); -- if (error) { -- if (error != -EOPNOTSUPP) { -- iput(inode); -- return error; -- } -- error = 0; -- } -- -- info = SHMEM_I(inode); -- inode->i_size = len-1; -- if (len <= (char *)inode - (char *)info) { -- /* do it inline */ -- memcpy(info, symname, len); -- inode->i_op = &shmem_symlink_inline_operations; -- } else { -- error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); -- if (error) { -- iput(inode); -- return error; -- } -- inode->i_op = &shmem_symlink_inode_operations; -- kaddr = kmap_atomic(page, KM_USER0); -- memcpy(kaddr, symname, len); -- kunmap_atomic(kaddr, KM_USER0); -- set_page_dirty(page); -- page_cache_release(page); -- } -- if (dir->i_mode & S_ISGID) -- inode->i_gid = dir->i_gid; -- dir->i_size += BOGO_DIRENT_SIZE; -- dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- d_instantiate(dentry, inode); -- dget(dentry); -- return 0; --} -- --static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) --{ -- nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode)); -- return NULL; --} -- --static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) --{ -- struct page *page = NULL; -- int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); -- nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); -- return page; --} -- --static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) --{ -- if (!IS_ERR(nd_get_link(nd))) { -- struct page *page = cookie; -- kunmap(page); -- mark_page_accessed(page); -- page_cache_release(page); -- } --} -- --static const struct inode_operations shmem_symlink_inline_operations = { -- .readlink = generic_readlink, -- .follow_link = shmem_follow_link_inline, --}; -- --static const struct inode_operations shmem_symlink_inode_operations = { -- .truncate = shmem_truncate, -- .readlink = generic_readlink, -- .follow_link = shmem_follow_link, -- .put_link = shmem_put_link, --}; -- --#ifdef CONFIG_TMPFS_POSIX_ACL --/** -- * Superblocks without xattr inode operations will get security.* xattr -- * support from the VFS "for free". As soon as we have any other xattrs -- * like ACLs, we also need to implement the security.* handlers at -- * filesystem level, though. -- */ -- --static size_t shmem_xattr_security_list(struct inode *inode, char *list, -- size_t list_len, const char *name, -- size_t name_len) --{ -- return security_inode_listsecurity(inode, list, list_len); --} -- --static int shmem_xattr_security_get(struct inode *inode, const char *name, -- void *buffer, size_t size) --{ -- if (strcmp(name, "") == 0) -- return -EINVAL; -- return security_inode_getsecurity(inode, name, buffer, size, -- -EOPNOTSUPP); --} -- --static int shmem_xattr_security_set(struct inode *inode, const char *name, -- const void *value, size_t size, int flags) --{ -- if (strcmp(name, "") == 0) -- return -EINVAL; -- return security_inode_setsecurity(inode, name, value, size, flags); --} -- --static struct xattr_handler shmem_xattr_security_handler = { -- .prefix = XATTR_SECURITY_PREFIX, -- .list = shmem_xattr_security_list, -- .get = shmem_xattr_security_get, -- .set = shmem_xattr_security_set, --}; -- --static struct xattr_handler *shmem_xattr_handlers[] = { -- &shmem_xattr_acl_access_handler, -- &shmem_xattr_acl_default_handler, -- &shmem_xattr_security_handler, -- NULL --}; --#endif -- --static struct dentry *shmem_get_parent(struct dentry *child) --{ -- return ERR_PTR(-ESTALE); --} -- --static int shmem_match(struct inode *ino, void *vfh) --{ -- __u32 *fh = vfh; -- __u64 inum = fh[2]; -- inum = (inum << 32) | fh[1]; -- return ino->i_ino == inum && fh[0] == ino->i_generation; --} -- --static struct dentry *shmem_get_dentry(struct super_block *sb, void *vfh) --{ -- struct dentry *de = NULL; -- struct inode *inode; -- __u32 *fh = vfh; -- __u64 inum = fh[2]; -- inum = (inum << 32) | fh[1]; -- -- inode = ilookup5(sb, (unsigned long)(inum+fh[0]), shmem_match, vfh); -- if (inode) { -- de = d_find_alias(inode); -- iput(inode); -- } -- -- return de? de: ERR_PTR(-ESTALE); --} -- --static struct dentry *shmem_decode_fh(struct super_block *sb, __u32 *fh, -- int len, int type, -- int (*acceptable)(void *context, struct dentry *de), -- void *context) --{ -- if (len < 3) -- return ERR_PTR(-ESTALE); -- -- return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, -- context); --} -- --static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, -- int connectable) --{ -- struct inode *inode = dentry->d_inode; -- -- if (*len < 3) -- return 255; -- -- if (hlist_unhashed(&inode->i_hash)) { -- /* Unfortunately insert_inode_hash is not idempotent, -- * so as we hash inodes here rather than at creation -- * time, we need a lock to ensure we only try -- * to do it once -- */ -- static DEFINE_SPINLOCK(lock); -- spin_lock(&lock); -- if (hlist_unhashed(&inode->i_hash)) -- __insert_inode_hash(inode, -- inode->i_ino + inode->i_generation); -- spin_unlock(&lock); -- } -- -- fh[0] = inode->i_generation; -- fh[1] = inode->i_ino; -- fh[2] = ((__u64)inode->i_ino) >> 32; -- -- *len = 3; -- return 1; --} -- --static struct export_operations shmem_export_ops = { -- .get_parent = shmem_get_parent, -- .get_dentry = shmem_get_dentry, -- .encode_fh = shmem_encode_fh, -- .decode_fh = shmem_decode_fh, --}; -- --static int shmem_parse_options(char *options, int *mode, uid_t *uid, -- gid_t *gid, unsigned long *blocks, unsigned long *inodes, -- int *policy, nodemask_t *policy_nodes) --{ -- char *this_char, *value, *rest; -- -- while (options != NULL) { -- this_char = options; -- for (;;) { -- /* -- * NUL-terminate this option: unfortunately, -- * mount options form a comma-separated list, -- * but mpol's nodelist may also contain commas. -- */ -- options = strchr(options, ','); -- if (options == NULL) -- break; -- options++; -- if (!isdigit(*options)) { -- options[-1] = '\0'; -- break; -- } -- } -- if (!*this_char) -- continue; -- if ((value = strchr(this_char,'=')) != NULL) { -- *value++ = 0; -- } else { -- printk(KERN_ERR -- "tmpfs: No value for mount option '%s'\n", -- this_char); -- return 1; -- } -- -- if (!strcmp(this_char,"size")) { -- unsigned long long size; -- size = memparse(value,&rest); -- if (*rest == '%') { -- size <<= PAGE_SHIFT; -- size *= totalram_pages; -- do_div(size, 100); -- rest++; -- } -- if (*rest) -- goto bad_val; -- *blocks = size >> PAGE_CACHE_SHIFT; -- } else if (!strcmp(this_char,"nr_blocks")) { -- *blocks = memparse(value,&rest); -- if (*rest) -- goto bad_val; -- } else if (!strcmp(this_char,"nr_inodes")) { -- *inodes = memparse(value,&rest); -- if (*rest) -- goto bad_val; -- } else if (!strcmp(this_char,"mode")) { -- if (!mode) -- continue; -- *mode = simple_strtoul(value,&rest,8); -- if (*rest) -- goto bad_val; -- } else if (!strcmp(this_char,"uid")) { -- if (!uid) -- continue; -- *uid = simple_strtoul(value,&rest,0); -- if (*rest) -- goto bad_val; -- } else if (!strcmp(this_char,"gid")) { -- if (!gid) -- continue; -- *gid = simple_strtoul(value,&rest,0); -- if (*rest) -- goto bad_val; -- } else if (!strcmp(this_char,"mpol")) { -- if (shmem_parse_mpol(value,policy,policy_nodes)) -- goto bad_val; -- } else { -- printk(KERN_ERR "tmpfs: Bad mount option %s\n", -- this_char); -- return 1; -- } -- } -- return 0; -- --bad_val: -- printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n", -- value, this_char); -- return 1; -- --} -- --static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) --{ -- struct shmem_sb_info *sbinfo = SHMEM_SB(sb); -- unsigned long max_blocks = sbinfo->max_blocks; -- unsigned long max_inodes = sbinfo->max_inodes; -- int policy = sbinfo->policy; -- nodemask_t policy_nodes = sbinfo->policy_nodes; -- unsigned long blocks; -- unsigned long inodes; -- int error = -EINVAL; -- -- if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, -- &max_inodes, &policy, &policy_nodes)) -- return error; -- -- spin_lock(&sbinfo->stat_lock); -- blocks = sbinfo->max_blocks - sbinfo->free_blocks; -- inodes = sbinfo->max_inodes - sbinfo->free_inodes; -- if (max_blocks < blocks) -- goto out; -- if (max_inodes < inodes) -- goto out; -- /* -- * Those tests also disallow limited->unlimited while any are in -- * use, so i_blocks will always be zero when max_blocks is zero; -- * but we must separately disallow unlimited->limited, because -- * in that case we have no record of how much is already in use. -- */ -- if (max_blocks && !sbinfo->max_blocks) -- goto out; -- if (max_inodes && !sbinfo->max_inodes) -- goto out; -- -- error = 0; -- sbinfo->max_blocks = max_blocks; -- sbinfo->free_blocks = max_blocks - blocks; -- sbinfo->max_inodes = max_inodes; -- sbinfo->free_inodes = max_inodes - inodes; -- sbinfo->policy = policy; -- sbinfo->policy_nodes = policy_nodes; --out: -- spin_unlock(&sbinfo->stat_lock); -- return error; --} --#endif -- --static void shmem_put_super(struct super_block *sb) --{ -- kfree(sb->s_fs_info); -- sb->s_fs_info = NULL; --} -- --static int shmem_fill_super(struct super_block *sb, -- void *data, int silent) --{ -- struct inode *inode; -- struct dentry *root; -- int mode = S_IRWXUGO | S_ISVTX; -- uid_t uid = current->fsuid; -- gid_t gid = current->fsgid; -- int err = -ENOMEM; -- struct shmem_sb_info *sbinfo; -- unsigned long blocks = 0; -- unsigned long inodes = 0; -- int policy = MPOL_DEFAULT; -- nodemask_t policy_nodes = node_online_map; -- --#ifdef CONFIG_TMPFS -- /* -- * Per default we only allow half of the physical ram per -- * tmpfs instance, limiting inodes to one per page of lowmem; -- * but the internal instance is left unlimited. -- */ -- if (!(sb->s_flags & MS_NOUSER)) { -- blocks = totalram_pages / 2; -- inodes = totalram_pages - totalhigh_pages; -- if (inodes > blocks) -- inodes = blocks; -- if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, -- &inodes, &policy, &policy_nodes)) -- return -EINVAL; -- } -- sb->s_export_op = &shmem_export_ops; --#else -- sb->s_flags |= MS_NOUSER; --#endif -- -- /* Round up to L1_CACHE_BYTES to resist false sharing */ -- sbinfo = kmalloc(max((int)sizeof(struct shmem_sb_info), -- L1_CACHE_BYTES), GFP_KERNEL); -- if (!sbinfo) -- return -ENOMEM; -- -- spin_lock_init(&sbinfo->stat_lock); -- sbinfo->max_blocks = blocks; -- sbinfo->free_blocks = blocks; -- sbinfo->max_inodes = inodes; -- sbinfo->free_inodes = inodes; -- sbinfo->policy = policy; -- sbinfo->policy_nodes = policy_nodes; -- -- sb->s_fs_info = sbinfo; -- sb->s_maxbytes = SHMEM_MAX_BYTES; -- sb->s_blocksize = PAGE_CACHE_SIZE; -- sb->s_blocksize_bits = PAGE_CACHE_SHIFT; -- sb->s_magic = TMPFS_MAGIC; -- sb->s_op = &shmem_ops; -- sb->s_time_gran = 1; --#ifdef CONFIG_TMPFS_POSIX_ACL -- sb->s_xattr = shmem_xattr_handlers; -- sb->s_flags |= MS_POSIXACL; --#endif -- -- inode = shmem_get_inode(sb, S_IFDIR | mode, 0); -- if (!inode) -- goto failed; -- inode->i_uid = uid; -- inode->i_gid = gid; -- root = d_alloc_root(inode); -- if (!root) -- goto failed_iput; -- sb->s_root = root; -- return 0; -- --failed_iput: -- iput(inode); --failed: -- shmem_put_super(sb); -- return err; --} -- --static struct kmem_cache *shmem_inode_cachep; -- --static struct inode *shmem_alloc_inode(struct super_block *sb) --{ -- struct shmem_inode_info *p; -- p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); -- if (!p) -- return NULL; -- return &p->vfs_inode; --} -- --static void shmem_destroy_inode(struct inode *inode) --{ -- if ((inode->i_mode & S_IFMT) == S_IFREG) { -- /* only struct inode is valid if it's an inline symlink */ -- mpol_free_shared_policy(&SHMEM_I(inode)->policy); -- } -- shmem_acl_destroy_inode(inode); -- kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); --} -- --static void init_once(void *foo, struct kmem_cache *cachep, -- unsigned long flags) --{ -- struct shmem_inode_info *p = (struct shmem_inode_info *) foo; -- -- inode_init_once(&p->vfs_inode); --#ifdef CONFIG_TMPFS_POSIX_ACL -- p->i_acl = NULL; -- p->i_default_acl = NULL; --#endif --} -- --static int init_inodecache(void) --{ -- shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", -- sizeof(struct shmem_inode_info), -- 0, 0, init_once, NULL); -- if (shmem_inode_cachep == NULL) -- return -ENOMEM; -- return 0; --} -- --static void destroy_inodecache(void) --{ -- kmem_cache_destroy(shmem_inode_cachep); --} -- --static const struct address_space_operations shmem_aops = { -- .writepage = shmem_writepage, -- .set_page_dirty = __set_page_dirty_no_writeback, --#ifdef CONFIG_TMPFS -- .prepare_write = shmem_prepare_write, -- .commit_write = simple_commit_write, --#endif -- .migratepage = migrate_page, --}; -- --static const struct file_operations shmem_file_operations = { -- .mmap = shmem_mmap, --#ifdef CONFIG_TMPFS -- .llseek = generic_file_llseek, -- .read = shmem_file_read, -- .write = shmem_file_write, -- .fsync = simple_sync_file, -- .sendfile = shmem_file_sendfile, --#endif --}; -- --static const struct inode_operations shmem_inode_operations = { -- .truncate = shmem_truncate, -- .setattr = shmem_notify_change, -- .truncate_range = shmem_truncate_range, --#ifdef CONFIG_TMPFS_POSIX_ACL -- .setxattr = generic_setxattr, -- .getxattr = generic_getxattr, -- .listxattr = generic_listxattr, -- .removexattr = generic_removexattr, -- .permission = shmem_permission, --#endif -- --}; -- --static const struct inode_operations shmem_dir_inode_operations = { --#ifdef CONFIG_TMPFS -- .create = shmem_create, -- .lookup = simple_lookup, -- .link = shmem_link, -- .unlink = shmem_unlink, -- .symlink = shmem_symlink, -- .mkdir = shmem_mkdir, -- .rmdir = shmem_rmdir, -- .mknod = shmem_mknod, -- .rename = shmem_rename, --#endif --#ifdef CONFIG_TMPFS_POSIX_ACL -- .setattr = shmem_notify_change, -- .setxattr = generic_setxattr, -- .getxattr = generic_getxattr, -- .listxattr = generic_listxattr, -- .removexattr = generic_removexattr, -- .permission = shmem_permission, --#endif --}; -- --static const struct inode_operations shmem_special_inode_operations = { --#ifdef CONFIG_TMPFS_POSIX_ACL -- .setattr = shmem_notify_change, -- .setxattr = generic_setxattr, -- .getxattr = generic_getxattr, -- .listxattr = generic_listxattr, -- .removexattr = generic_removexattr, -- .permission = shmem_permission, --#endif --}; -- --static const struct super_operations shmem_ops = { -- .alloc_inode = shmem_alloc_inode, -- .destroy_inode = shmem_destroy_inode, --#ifdef CONFIG_TMPFS -- .statfs = shmem_statfs, -- .remount_fs = shmem_remount_fs, --#endif -- .delete_inode = shmem_delete_inode, -- .drop_inode = generic_delete_inode, -- .put_super = shmem_put_super, --}; -- --static struct vm_operations_struct shmem_vm_ops = { -- .nopage = shmem_nopage, -- .populate = shmem_populate, --#ifdef CONFIG_NUMA -- .set_policy = shmem_set_policy, -- .get_policy = shmem_get_policy, --#endif --}; -- -- --static int shmem_get_sb(struct file_system_type *fs_type, -- int flags, const char *dev_name, void *data, struct vfsmount *mnt) --{ -- return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt); --} -- --static struct file_system_type tmpfs_fs_type = { -- .owner = THIS_MODULE, -- .name = "tmpfs", -- .get_sb = shmem_get_sb, -- .kill_sb = kill_litter_super, --}; --static struct vfsmount *shm_mnt; -- --static int __init init_tmpfs(void) --{ -- int error; -- -- error = init_inodecache(); -- if (error) -- goto out3; -- -- error = register_filesystem(&tmpfs_fs_type); -- if (error) { -- printk(KERN_ERR "Could not register tmpfs\n"); -- goto out2; -- } -- -- shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER, -- tmpfs_fs_type.name, NULL); -- if (IS_ERR(shm_mnt)) { -- error = PTR_ERR(shm_mnt); -- printk(KERN_ERR "Could not kern_mount tmpfs\n"); -- goto out1; -- } -- return 0; -- --out1: -- unregister_filesystem(&tmpfs_fs_type); --out2: -- destroy_inodecache(); --out3: -- shm_mnt = ERR_PTR(error); -- return error; --} --module_init(init_tmpfs) -- --/* -- * shmem_file_setup - get an unlinked file living in tmpfs -- * -- * @name: name for dentry (to be seen in /proc//maps -- * @size: size to be set for the file -- * -- */ --struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) --{ -- int error; -- struct file *file; -- struct inode *inode; -- struct dentry *dentry, *root; -- struct qstr this; -- -- if (IS_ERR(shm_mnt)) -- return (void *)shm_mnt; -- -- if (size < 0 || size > SHMEM_MAX_BYTES) -- return ERR_PTR(-EINVAL); -- -- if (shmem_acct_size(flags, size)) -- return ERR_PTR(-ENOMEM); -- -- error = -ENOMEM; -- this.name = name; -- this.len = strlen(name); -- this.hash = 0; /* will go */ -- root = shm_mnt->mnt_root; -- dentry = d_alloc(root, &this); -- if (!dentry) -- goto put_memory; -- -- error = -ENFILE; -- file = get_empty_filp(); -- if (!file) -- goto put_dentry; -- -- error = -ENOSPC; -- inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); -- if (!inode) -- goto close_file; -- -- SHMEM_I(inode)->flags = flags & VM_ACCOUNT; -- d_instantiate(dentry, inode); -- inode->i_size = size; -- inode->i_nlink = 0; /* It is unlinked */ -- file->f_path.mnt = mntget(shm_mnt); -- file->f_path.dentry = dentry; -- file->f_mapping = inode->i_mapping; -- file->f_op = &shmem_file_operations; -- file->f_mode = FMODE_WRITE | FMODE_READ; -- return file; -- --close_file: -- put_filp(file); --put_dentry: -- dput(dentry); --put_memory: -- shmem_unacct_size(flags, size); -- return ERR_PTR(error); --} -- --/* -- * shmem_zero_setup - setup a shared anonymous mapping -- * -- * @vma: the vma to be mmapped is prepared by do_mmap_pgoff -- */ --int shmem_zero_setup(struct vm_area_struct *vma) --{ -- struct file *file; -- loff_t size = vma->vm_end - vma->vm_start; -- -- file = shmem_file_setup("dev/zero", size, vma->vm_flags); -- if (IS_ERR(file)) -- return PTR_ERR(file); -- -- if (vma->vm_file) -- fput(vma->vm_file); -- vma->vm_file = file; -- vma->vm_ops = &shmem_vm_ops; -- return 0; --} diff -Nurb linux-2.6.22-570/mm/slab.c linux-2.6.22-590/mm/slab.c --- linux-2.6.22-570/mm/slab.c 2008-03-20 13:25:46.000000000 -0400 +++ linux-2.6.22-590/mm/slab.c 2008-03-20 13:28:03.000000000 -0400 @@ -183503,2493 +179235,6 @@ diff -Nurb linux-2.6.22-570/net/ipv4/tcp_ipv4.c linux-2.6.22-590/net/ipv4/tcp_ip -EXPORT_SYMBOL(sysctl_local_port_range); EXPORT_SYMBOL(sysctl_tcp_low_latency); -diff -Nurb linux-2.6.22-570/net/ipv4/tcp_ipv4.c.orig linux-2.6.22-590/net/ipv4/tcp_ipv4.c.orig ---- linux-2.6.22-570/net/ipv4/tcp_ipv4.c.orig 2008-03-20 13:25:40.000000000 -0400 -+++ linux-2.6.22-590/net/ipv4/tcp_ipv4.c.orig 1969-12-31 19:00:00.000000000 -0500 -@@ -1,2483 +0,0 @@ --/* -- * INET An implementation of the TCP/IP protocol suite for the LINUX -- * operating system. INET is implemented using the BSD Socket -- * interface as the means of communication with the user level. -- * -- * Implementation of the Transmission Control Protocol(TCP). -- * -- * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $ -- * -- * IPv4 specific functions -- * -- * -- * code split from: -- * linux/ipv4/tcp.c -- * linux/ipv4/tcp_input.c -- * linux/ipv4/tcp_output.c -- * -- * See tcp.c for author information -- * -- * This program is free software; you can redistribute it and/or -- * modify it under the terms of the GNU General Public License -- * as published by the Free Software Foundation; either version -- * 2 of the License, or (at your option) any later version. -- */ -- --/* -- * Changes: -- * David S. Miller : New socket lookup architecture. -- * This code is dedicated to John Dyson. -- * David S. Miller : Change semantics of established hash, -- * half is devoted to TIME_WAIT sockets -- * and the rest go in the other half. -- * Andi Kleen : Add support for syncookies and fixed -- * some bugs: ip options weren't passed to -- * the TCP layer, missed a check for an -- * ACK bit. -- * Andi Kleen : Implemented fast path mtu discovery. -- * Fixed many serious bugs in the -- * request_sock handling and moved -- * most of it into the af independent code. -- * Added tail drop and some other bugfixes. -- * Added new listen semantics. -- * Mike McLagan : Routing by source -- * Juan Jose Ciarlante: ip_dynaddr bits -- * Andi Kleen: various fixes. -- * Vitaly E. Lavrov : Transparent proxy revived after year -- * coma. -- * Andi Kleen : Fix new listen. -- * Andi Kleen : Fix accept error reporting. -- * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which -- * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind -- * a single port at the same time. -- */ -- -- --#include --#include --#include --#include --#include --#include --#include --#include -- --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include --#include --#include --#include --#include -- --#include --#include -- --int sysctl_tcp_tw_reuse __read_mostly; --int sysctl_tcp_low_latency __read_mostly; -- --/* Check TCP sequence numbers in ICMP packets. */ --#define ICMP_MIN_LENGTH 8 -- --/* Socket used for sending RSTs */ --static struct socket *tcp_socket __read_mostly; -- --void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); -- --#ifdef CONFIG_TCP_MD5SIG --static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, -- __be32 addr); --static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, -- __be32 saddr, __be32 daddr, -- struct tcphdr *th, int protocol, -- int tcplen); --#endif -- --struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { -- .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), -- .lhash_users = ATOMIC_INIT(0), -- .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), --}; -- --static int tcp_v4_get_port(struct sock *sk, unsigned short snum) --{ -- return inet_csk_get_port(&tcp_hashinfo, sk, snum, -- inet_csk_bind_conflict); --} -- --static void tcp_v4_hash(struct sock *sk) --{ -- inet_hash(&tcp_hashinfo, sk); --} -- --void tcp_unhash(struct sock *sk) --{ -- inet_unhash(&tcp_hashinfo, sk); --} -- --static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) --{ -- return secure_tcp_sequence_number(ip_hdr(skb)->daddr, -- ip_hdr(skb)->saddr, -- tcp_hdr(skb)->dest, -- tcp_hdr(skb)->source); --} -- --int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) --{ -- const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); -- struct tcp_sock *tp = tcp_sk(sk); -- -- /* With PAWS, it is safe from the viewpoint -- of data integrity. Even without PAWS it is safe provided sequence -- spaces do not overlap i.e. at data rates <= 80Mbit/sec. -- -- Actually, the idea is close to VJ's one, only timestamp cache is -- held not per host, but per port pair and TW bucket is used as state -- holder. -- -- If TW bucket has been already destroyed we fall back to VJ's scheme -- and use initial timestamp retrieved from peer table. -- */ -- if (tcptw->tw_ts_recent_stamp && -- (twp == NULL || (sysctl_tcp_tw_reuse && -- get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { -- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; -- if (tp->write_seq == 0) -- tp->write_seq = 1; -- tp->rx_opt.ts_recent = tcptw->tw_ts_recent; -- tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; -- sock_hold(sktw); -- return 1; -- } -- -- return 0; --} -- --EXPORT_SYMBOL_GPL(tcp_twsk_unique); -- --/* This will initiate an outgoing connection. */ --int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) --{ -- struct inet_sock *inet = inet_sk(sk); -- struct tcp_sock *tp = tcp_sk(sk); -- struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; -- struct rtable *rt; -- __be32 daddr, nexthop; -- int tmp; -- int err; -- -- if (addr_len < sizeof(struct sockaddr_in)) -- return -EINVAL; -- -- if (usin->sin_family != AF_INET) -- return -EAFNOSUPPORT; -- -- nexthop = daddr = usin->sin_addr.s_addr; -- if (inet->opt && inet->opt->srr) { -- if (!daddr) -- return -EINVAL; -- nexthop = inet->opt->faddr; -- } -- -- tmp = ip_route_connect(&rt, nexthop, inet->saddr, -- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, -- IPPROTO_TCP, -- inet->sport, usin->sin_port, sk, 1); -- if (tmp < 0) { -- if (tmp == -ENETUNREACH) -- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); -- return tmp; -- } -- -- if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { -- ip_rt_put(rt); -- return -ENETUNREACH; -- } -- -- if (!inet->opt || !inet->opt->srr) -- daddr = rt->rt_dst; -- -- if (!inet->saddr) -- inet->saddr = rt->rt_src; -- inet->rcv_saddr = inet->saddr; -- -- if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { -- /* Reset inherited state */ -- tp->rx_opt.ts_recent = 0; -- tp->rx_opt.ts_recent_stamp = 0; -- tp->write_seq = 0; -- } -- -- if (tcp_death_row.sysctl_tw_recycle && -- !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { -- struct inet_peer *peer = rt_get_peer(rt); -- /* -- * VJ's idea. We save last timestamp seen from -- * the destination in peer table, when entering state -- * TIME-WAIT * and initialize rx_opt.ts_recent from it, -- * when trying new connection. -- */ -- if (peer != NULL && -- peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) { -- tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; -- tp->rx_opt.ts_recent = peer->tcp_ts; -- } -- } -- -- inet->dport = usin->sin_port; -- inet->daddr = daddr; -- -- inet_csk(sk)->icsk_ext_hdr_len = 0; -- if (inet->opt) -- inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; -- -- tp->rx_opt.mss_clamp = 536; -- -- /* Socket identity is still unknown (sport may be zero). -- * However we set state to SYN-SENT and not releasing socket -- * lock select source port, enter ourselves into the hash tables and -- * complete initialization after this. -- */ -- tcp_set_state(sk, TCP_SYN_SENT); -- err = inet_hash_connect(&tcp_death_row, sk); -- if (err) -- goto failure; -- -- err = ip_route_newports(&rt, IPPROTO_TCP, -- inet->sport, inet->dport, sk); -- if (err) -- goto failure; -- -- /* OK, now commit destination to socket. */ -- sk->sk_gso_type = SKB_GSO_TCPV4; -- sk_setup_caps(sk, &rt->u.dst); -- -- if (!tp->write_seq) -- tp->write_seq = secure_tcp_sequence_number(inet->saddr, -- inet->daddr, -- inet->sport, -- usin->sin_port); -- -- inet->id = tp->write_seq ^ jiffies; -- -- err = tcp_connect(sk); -- rt = NULL; -- if (err) -- goto failure; -- -- return 0; -- --failure: -- /* -- * This unhashes the socket and releases the local port, -- * if necessary. -- */ -- tcp_set_state(sk, TCP_CLOSE); -- ip_rt_put(rt); -- sk->sk_route_caps = 0; -- inet->dport = 0; -- return err; --} -- --/* -- * This routine does path mtu discovery as defined in RFC1191. -- */ --static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) --{ -- struct dst_entry *dst; -- struct inet_sock *inet = inet_sk(sk); -- -- /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs -- * send out by Linux are always <576bytes so they should go through -- * unfragmented). -- */ -- if (sk->sk_state == TCP_LISTEN) -- return; -- -- /* We don't check in the destentry if pmtu discovery is forbidden -- * on this route. We just assume that no packet_to_big packets -- * are send back when pmtu discovery is not active. -- * There is a small race when the user changes this flag in the -- * route, but I think that's acceptable. -- */ -- if ((dst = __sk_dst_check(sk, 0)) == NULL) -- return; -- -- dst->ops->update_pmtu(dst, mtu); -- -- /* Something is about to be wrong... Remember soft error -- * for the case, if this connection will not able to recover. -- */ -- if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) -- sk->sk_err_soft = EMSGSIZE; -- -- mtu = dst_mtu(dst); -- -- if (inet->pmtudisc != IP_PMTUDISC_DONT && -- inet_csk(sk)->icsk_pmtu_cookie > mtu) { -- tcp_sync_mss(sk, mtu); -- -- /* Resend the TCP packet because it's -- * clear that the old packet has been -- * dropped. This is the new "fast" path mtu -- * discovery. -- */ -- tcp_simple_retransmit(sk); -- } /* else let the usual retransmit timer handle it */ --} -- --/* -- * This routine is called by the ICMP module when it gets some -- * sort of error condition. If err < 0 then the socket should -- * be closed and the error returned to the user. If err > 0 -- * it's just the icmp type << 8 | icmp code. After adjustment -- * header points to the first 8 bytes of the tcp header. We need -- * to find the appropriate port. -- * -- * The locking strategy used here is very "optimistic". When -- * someone else accesses the socket the ICMP is just dropped -- * and for some paths there is no check at all. -- * A more general error queue to queue errors for later handling -- * is probably better. -- * -- */ -- --void tcp_v4_err(struct sk_buff *skb, u32 info) --{ -- struct iphdr *iph = (struct iphdr *)skb->data; -- struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); -- struct tcp_sock *tp; -- struct inet_sock *inet; -- const int type = icmp_hdr(skb)->type; -- const int code = icmp_hdr(skb)->code; -- struct sock *sk; -- __u32 seq; -- int err; -- -- if (skb->len < (iph->ihl << 2) + 8) { -- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); -- return; -- } -- -- sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, -- th->source, inet_iif(skb)); -- if (!sk) { -- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); -- return; -- } -- if (sk->sk_state == TCP_TIME_WAIT) { -- inet_twsk_put(inet_twsk(sk)); -- return; -- } -- -- bh_lock_sock(sk); -- /* If too many ICMPs get dropped on busy -- * servers this needs to be solved differently. -- */ -- if (sock_owned_by_user(sk)) -- NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); -- -- if (sk->sk_state == TCP_CLOSE) -- goto out; -- -- tp = tcp_sk(sk); -- seq = ntohl(th->seq); -- if (sk->sk_state != TCP_LISTEN && -- !between(seq, tp->snd_una, tp->snd_nxt)) { -- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); -- goto out; -- } -- -- switch (type) { -- case ICMP_SOURCE_QUENCH: -- /* Just silently ignore these. */ -- goto out; -- case ICMP_PARAMETERPROB: -- err = EPROTO; -- break; -- case ICMP_DEST_UNREACH: -- if (code > NR_ICMP_UNREACH) -- goto out; -- -- if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ -- if (!sock_owned_by_user(sk)) -- do_pmtu_discovery(sk, iph, info); -- goto out; -- } -- -- err = icmp_err_convert[code].errno; -- break; -- case ICMP_TIME_EXCEEDED: -- err = EHOSTUNREACH; -- break; -- default: -- goto out; -- } -- -- switch (sk->sk_state) { -- struct request_sock *req, **prev; -- case TCP_LISTEN: -- if (sock_owned_by_user(sk)) -- goto out; -- -- req = inet_csk_search_req(sk, &prev, th->dest, -- iph->daddr, iph->saddr); -- if (!req) -- goto out; -- -- /* ICMPs are not backlogged, hence we cannot get -- an established socket here. -- */ -- BUG_TRAP(!req->sk); -- -- if (seq != tcp_rsk(req)->snt_isn) { -- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); -- goto out; -- } -- -- /* -- * Still in SYN_RECV, just remove it silently. -- * There is no good way to pass the error to the newly -- * created socket, and POSIX does not want network -- * errors returned from accept(). -- */ -- inet_csk_reqsk_queue_drop(sk, req, prev); -- goto out; -- -- case TCP_SYN_SENT: -- case TCP_SYN_RECV: /* Cannot happen. -- It can f.e. if SYNs crossed. -- */ -- if (!sock_owned_by_user(sk)) { -- sk->sk_err = err; -- -- sk->sk_error_report(sk); -- -- tcp_done(sk); -- } else { -- sk->sk_err_soft = err; -- } -- goto out; -- } -- -- /* If we've already connected we will keep trying -- * until we time out, or the user gives up. -- * -- * rfc1122 4.2.3.9 allows to consider as hard errors -- * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, -- * but it is obsoleted by pmtu discovery). -- * -- * Note, that in modern internet, where routing is unreliable -- * and in each dark corner broken firewalls sit, sending random -- * errors ordered by their masters even this two messages finally lose -- * their original sense (even Linux sends invalid PORT_UNREACHs) -- * -- * Now we are in compliance with RFCs. -- * --ANK (980905) -- */ -- -- inet = inet_sk(sk); -- if (!sock_owned_by_user(sk) && inet->recverr) { -- sk->sk_err = err; -- sk->sk_error_report(sk); -- } else { /* Only an error on timeout */ -- sk->sk_err_soft = err; -- } -- --out: -- bh_unlock_sock(sk); -- sock_put(sk); --} -- --/* This routine computes an IPv4 TCP checksum. */ --void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) --{ -- struct inet_sock *inet = inet_sk(sk); -- struct tcphdr *th = tcp_hdr(skb); -- -- if (skb->ip_summed == CHECKSUM_PARTIAL) { -- th->check = ~tcp_v4_check(len, inet->saddr, -- inet->daddr, 0); -- skb->csum_start = skb_transport_header(skb) - skb->head; -- skb->csum_offset = offsetof(struct tcphdr, check); -- } else { -- th->check = tcp_v4_check(len, inet->saddr, inet->daddr, -- csum_partial((char *)th, -- th->doff << 2, -- skb->csum)); -- } --} -- --int tcp_v4_gso_send_check(struct sk_buff *skb) --{ -- const struct iphdr *iph; -- struct tcphdr *th; -- -- if (!pskb_may_pull(skb, sizeof(*th))) -- return -EINVAL; -- -- iph = ip_hdr(skb); -- th = tcp_hdr(skb); -- -- th->check = 0; -- th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0); -- skb->csum_start = skb_transport_header(skb) - skb->head; -- skb->csum_offset = offsetof(struct tcphdr, check); -- skb->ip_summed = CHECKSUM_PARTIAL; -- return 0; --} -- --/* -- * This routine will send an RST to the other tcp. -- * -- * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) -- * for reset. -- * Answer: if a packet caused RST, it is not for a socket -- * existing in our system, if it is matched to a socket, -- * it is just duplicate segment or bug in other side's TCP. -- * So that we build reply only basing on parameters -- * arrived with segment. -- * Exception: precedence violation. We do not implement it in any case. -- */ -- --static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) --{ -- struct tcphdr *th = tcp_hdr(skb); -- struct { -- struct tcphdr th; --#ifdef CONFIG_TCP_MD5SIG -- __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; --#endif -- } rep; -- struct ip_reply_arg arg; --#ifdef CONFIG_TCP_MD5SIG -- struct tcp_md5sig_key *key; --#endif -- -- /* Never send a reset in response to a reset. */ -- if (th->rst) -- return; -- -- if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL) -- return; -- -- /* Swap the send and the receive. */ -- memset(&rep, 0, sizeof(rep)); -- rep.th.dest = th->source; -- rep.th.source = th->dest; -- rep.th.doff = sizeof(struct tcphdr) / 4; -- rep.th.rst = 1; -- -- if (th->ack) { -- rep.th.seq = th->ack_seq; -- } else { -- rep.th.ack = 1; -- rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + -- skb->len - (th->doff << 2)); -- } -- -- memset(&arg, 0, sizeof(arg)); -- arg.iov[0].iov_base = (unsigned char *)&rep; -- arg.iov[0].iov_len = sizeof(rep.th); -- --#ifdef CONFIG_TCP_MD5SIG -- key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; -- if (key) { -- rep.opt[0] = htonl((TCPOPT_NOP << 24) | -- (TCPOPT_NOP << 16) | -- (TCPOPT_MD5SIG << 8) | -- TCPOLEN_MD5SIG); -- /* Update length and the length the header thinks exists */ -- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; -- rep.th.doff = arg.iov[0].iov_len / 4; -- -- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1], -- key, -- ip_hdr(skb)->daddr, -- ip_hdr(skb)->saddr, -- &rep.th, IPPROTO_TCP, -- arg.iov[0].iov_len); -- } --#endif -- arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, -- ip_hdr(skb)->saddr, /* XXX */ -- sizeof(struct tcphdr), IPPROTO_TCP, 0); -- arg.csumoffset = offsetof(struct tcphdr, check) / 2; -- -- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); -- -- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); -- TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); --} -- --/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states -- outside socket context is ugly, certainly. What can I do? -- */ -- --static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, -- struct sk_buff *skb, u32 seq, u32 ack, -- u32 win, u32 ts) --{ -- struct tcphdr *th = tcp_hdr(skb); -- struct { -- struct tcphdr th; -- __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) --#ifdef CONFIG_TCP_MD5SIG -- + (TCPOLEN_MD5SIG_ALIGNED >> 2) --#endif -- ]; -- } rep; -- struct ip_reply_arg arg; --#ifdef CONFIG_TCP_MD5SIG -- struct tcp_md5sig_key *key; -- struct tcp_md5sig_key tw_key; --#endif -- -- memset(&rep.th, 0, sizeof(struct tcphdr)); -- memset(&arg, 0, sizeof(arg)); -- -- arg.iov[0].iov_base = (unsigned char *)&rep; -- arg.iov[0].iov_len = sizeof(rep.th); -- if (ts) { -- rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | -- (TCPOPT_TIMESTAMP << 8) | -- TCPOLEN_TIMESTAMP); -- rep.opt[1] = htonl(tcp_time_stamp); -- rep.opt[2] = htonl(ts); -- arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; -- } -- -- /* Swap the send and the receive. */ -- rep.th.dest = th->source; -- rep.th.source = th->dest; -- rep.th.doff = arg.iov[0].iov_len / 4; -- rep.th.seq = htonl(seq); -- rep.th.ack_seq = htonl(ack); -- rep.th.ack = 1; -- rep.th.window = htons(win); -- --#ifdef CONFIG_TCP_MD5SIG -- /* -- * The SKB holds an imcoming packet, but may not have a valid ->sk -- * pointer. This is especially the case when we're dealing with a -- * TIME_WAIT ack, because the sk structure is long gone, and only -- * the tcp_timewait_sock remains. So the md5 key is stashed in that -- * structure, and we use it in preference. I believe that (twsk || -- * skb->sk) holds true, but we program defensively. -- */ -- if (!twsk && skb->sk) { -- key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr); -- } else if (twsk && twsk->tw_md5_keylen) { -- tw_key.key = twsk->tw_md5_key; -- tw_key.keylen = twsk->tw_md5_keylen; -- key = &tw_key; -- } else -- key = NULL; -- -- if (key) { -- int offset = (ts) ? 3 : 0; -- -- rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | -- (TCPOPT_NOP << 16) | -- (TCPOPT_MD5SIG << 8) | -- TCPOLEN_MD5SIG); -- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; -- rep.th.doff = arg.iov[0].iov_len/4; -- -- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset], -- key, -- ip_hdr(skb)->daddr, -- ip_hdr(skb)->saddr, -- &rep.th, IPPROTO_TCP, -- arg.iov[0].iov_len); -- } --#endif -- arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, -- ip_hdr(skb)->saddr, /* XXX */ -- arg.iov[0].iov_len, IPPROTO_TCP, 0); -- arg.csumoffset = offsetof(struct tcphdr, check) / 2; -- if (twsk) -- arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; -- -- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); -- -- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); --} -- --static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) --{ -- struct inet_timewait_sock *tw = inet_twsk(sk); -- struct tcp_timewait_sock *tcptw = tcp_twsk(sk); -- -- tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, -- tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, -- tcptw->tw_ts_recent); -- -- inet_twsk_put(tw); --} -- --static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, -- struct request_sock *req) --{ -- tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, -- tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, -- req->ts_recent); --} -- --/* -- * Send a SYN-ACK after having received an ACK. -- * This still operates on a request_sock only, not on a big -- * socket. -- */ --static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, -- struct dst_entry *dst) --{ -- const struct inet_request_sock *ireq = inet_rsk(req); -- int err = -1; -- struct sk_buff * skb; -- -- /* First, grab a route. */ -- if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) -- goto out; -- -- skb = tcp_make_synack(sk, dst, req); -- -- if (skb) { -- struct tcphdr *th = tcp_hdr(skb); -- -- th->check = tcp_v4_check(skb->len, -- ireq->loc_addr, -- ireq->rmt_addr, -- csum_partial((char *)th, skb->len, -- skb->csum)); -- -- err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, -- ireq->rmt_addr, -- ireq->opt); -- err = net_xmit_eval(err); -- } -- --out: -- dst_release(dst); -- return err; --} -- --/* -- * IPv4 request_sock destructor. -- */ --static void tcp_v4_reqsk_destructor(struct request_sock *req) --{ -- kfree(inet_rsk(req)->opt); --} -- --#ifdef CONFIG_SYN_COOKIES --static void syn_flood_warning(struct sk_buff *skb) --{ -- static unsigned long warntime; -- -- if (time_after(jiffies, (warntime + HZ * 60))) { -- warntime = jiffies; -- printk(KERN_INFO -- "possible SYN flooding on port %d. Sending cookies.\n", -- ntohs(tcp_hdr(skb)->dest)); -- } --} --#endif -- --/* -- * Save and compile IPv4 options into the request_sock if needed. -- */ --static struct ip_options *tcp_v4_save_options(struct sock *sk, -- struct sk_buff *skb) --{ -- struct ip_options *opt = &(IPCB(skb)->opt); -- struct ip_options *dopt = NULL; -- -- if (opt && opt->optlen) { -- int opt_size = optlength(opt); -- dopt = kmalloc(opt_size, GFP_ATOMIC); -- if (dopt) { -- if (ip_options_echo(dopt, skb)) { -- kfree(dopt); -- dopt = NULL; -- } -- } -- } -- return dopt; --} -- --#ifdef CONFIG_TCP_MD5SIG --/* -- * RFC2385 MD5 checksumming requires a mapping of -- * IP address->MD5 Key. -- * We need to maintain these in the sk structure. -- */ -- --/* Find the Key structure for an address. */ --static struct tcp_md5sig_key * -- tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- int i; -- -- if (!tp->md5sig_info || !tp->md5sig_info->entries4) -- return NULL; -- for (i = 0; i < tp->md5sig_info->entries4; i++) { -- if (tp->md5sig_info->keys4[i].addr == addr) -- return &tp->md5sig_info->keys4[i].base; -- } -- return NULL; --} -- --struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, -- struct sock *addr_sk) --{ -- return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr); --} -- --EXPORT_SYMBOL(tcp_v4_md5_lookup); -- --static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, -- struct request_sock *req) --{ -- return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr); --} -- --/* This can be called on a newly created socket, from other files */ --int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, -- u8 *newkey, u8 newkeylen) --{ -- /* Add Key to the list */ -- struct tcp4_md5sig_key *key; -- struct tcp_sock *tp = tcp_sk(sk); -- struct tcp4_md5sig_key *keys; -- -- key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr); -- if (key) { -- /* Pre-existing entry - just update that one. */ -- kfree(key->base.key); -- key->base.key = newkey; -- key->base.keylen = newkeylen; -- } else { -- struct tcp_md5sig_info *md5sig; -- -- if (!tp->md5sig_info) { -- tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), -- GFP_ATOMIC); -- if (!tp->md5sig_info) { -- kfree(newkey); -- return -ENOMEM; -- } -- sk->sk_route_caps &= ~NETIF_F_GSO_MASK; -- } -- if (tcp_alloc_md5sig_pool() == NULL) { -- kfree(newkey); -- return -ENOMEM; -- } -- md5sig = tp->md5sig_info; -- -- if (md5sig->alloced4 == md5sig->entries4) { -- keys = kmalloc((sizeof(*keys) * -- (md5sig->entries4 + 1)), GFP_ATOMIC); -- if (!keys) { -- kfree(newkey); -- tcp_free_md5sig_pool(); -- return -ENOMEM; -- } -- -- if (md5sig->entries4) -- memcpy(keys, md5sig->keys4, -- sizeof(*keys) * md5sig->entries4); -- -- /* Free old key list, and reference new one */ -- if (md5sig->keys4) -- kfree(md5sig->keys4); -- md5sig->keys4 = keys; -- md5sig->alloced4++; -- } -- md5sig->entries4++; -- md5sig->keys4[md5sig->entries4 - 1].addr = addr; -- md5sig->keys4[md5sig->entries4 - 1].base.key = newkey; -- md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen; -- } -- return 0; --} -- --EXPORT_SYMBOL(tcp_v4_md5_do_add); -- --static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, -- u8 *newkey, u8 newkeylen) --{ -- return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr, -- newkey, newkeylen); --} -- --int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- int i; -- -- for (i = 0; i < tp->md5sig_info->entries4; i++) { -- if (tp->md5sig_info->keys4[i].addr == addr) { -- /* Free the key */ -- kfree(tp->md5sig_info->keys4[i].base.key); -- tp->md5sig_info->entries4--; -- -- if (tp->md5sig_info->entries4 == 0) { -- kfree(tp->md5sig_info->keys4); -- tp->md5sig_info->keys4 = NULL; -- tp->md5sig_info->alloced4 = 0; -- } else if (tp->md5sig_info->entries4 != i) { -- /* Need to do some manipulation */ -- memcpy(&tp->md5sig_info->keys4[i], -- &tp->md5sig_info->keys4[i+1], -- (tp->md5sig_info->entries4 - i) * -- sizeof(struct tcp4_md5sig_key)); -- } -- tcp_free_md5sig_pool(); -- return 0; -- } -- } -- return -ENOENT; --} -- --EXPORT_SYMBOL(tcp_v4_md5_do_del); -- --static void tcp_v4_clear_md5_list(struct sock *sk) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- -- /* Free each key, then the set of key keys, -- * the crypto element, and then decrement our -- * hold on the last resort crypto. -- */ -- if (tp->md5sig_info->entries4) { -- int i; -- for (i = 0; i < tp->md5sig_info->entries4; i++) -- kfree(tp->md5sig_info->keys4[i].base.key); -- tp->md5sig_info->entries4 = 0; -- tcp_free_md5sig_pool(); -- } -- if (tp->md5sig_info->keys4) { -- kfree(tp->md5sig_info->keys4); -- tp->md5sig_info->keys4 = NULL; -- tp->md5sig_info->alloced4 = 0; -- } --} -- --static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, -- int optlen) --{ -- struct tcp_md5sig cmd; -- struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; -- u8 *newkey; -- -- if (optlen < sizeof(cmd)) -- return -EINVAL; -- -- if (copy_from_user(&cmd, optval, sizeof(cmd))) -- return -EFAULT; -- -- if (sin->sin_family != AF_INET) -- return -EINVAL; -- -- if (!cmd.tcpm_key || !cmd.tcpm_keylen) { -- if (!tcp_sk(sk)->md5sig_info) -- return -ENOENT; -- return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr); -- } -- -- if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) -- return -EINVAL; -- -- if (!tcp_sk(sk)->md5sig_info) { -- struct tcp_sock *tp = tcp_sk(sk); -- struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL); -- -- if (!p) -- return -EINVAL; -- -- tp->md5sig_info = p; -- sk->sk_route_caps &= ~NETIF_F_GSO_MASK; -- } -- -- newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); -- if (!newkey) -- return -ENOMEM; -- return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, -- newkey, cmd.tcpm_keylen); --} -- --static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, -- __be32 saddr, __be32 daddr, -- struct tcphdr *th, int protocol, -- int tcplen) --{ -- struct scatterlist sg[4]; -- __u16 data_len; -- int block = 0; -- __sum16 old_checksum; -- struct tcp_md5sig_pool *hp; -- struct tcp4_pseudohdr *bp; -- struct hash_desc *desc; -- int err; -- unsigned int nbytes = 0; -- -- /* -- * Okay, so RFC2385 is turned on for this connection, -- * so we need to generate the MD5 hash for the packet now. -- */ -- -- hp = tcp_get_md5sig_pool(); -- if (!hp) -- goto clear_hash_noput; -- -- bp = &hp->md5_blk.ip4; -- desc = &hp->md5_desc; -- -- /* -- * 1. the TCP pseudo-header (in the order: source IP address, -- * destination IP address, zero-padded protocol number, and -- * segment length) -- */ -- bp->saddr = saddr; -- bp->daddr = daddr; -- bp->pad = 0; -- bp->protocol = protocol; -- bp->len = htons(tcplen); -- sg_set_buf(&sg[block++], bp, sizeof(*bp)); -- nbytes += sizeof(*bp); -- -- /* 2. the TCP header, excluding options, and assuming a -- * checksum of zero/ -- */ -- old_checksum = th->check; -- th->check = 0; -- sg_set_buf(&sg[block++], th, sizeof(struct tcphdr)); -- nbytes += sizeof(struct tcphdr); -- -- /* 3. the TCP segment data (if any) */ -- data_len = tcplen - (th->doff << 2); -- if (data_len > 0) { -- unsigned char *data = (unsigned char *)th + (th->doff << 2); -- sg_set_buf(&sg[block++], data, data_len); -- nbytes += data_len; -- } -- -- /* 4. an independently-specified key or password, known to both -- * TCPs and presumably connection-specific -- */ -- sg_set_buf(&sg[block++], key->key, key->keylen); -- nbytes += key->keylen; -- -- /* Now store the Hash into the packet */ -- err = crypto_hash_init(desc); -- if (err) -- goto clear_hash; -- err = crypto_hash_update(desc, sg, nbytes); -- if (err) -- goto clear_hash; -- err = crypto_hash_final(desc, md5_hash); -- if (err) -- goto clear_hash; -- -- /* Reset header, and free up the crypto */ -- tcp_put_md5sig_pool(); -- th->check = old_checksum; -- --out: -- return 0; --clear_hash: -- tcp_put_md5sig_pool(); --clear_hash_noput: -- memset(md5_hash, 0, 16); -- goto out; --} -- --int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, -- struct sock *sk, -- struct dst_entry *dst, -- struct request_sock *req, -- struct tcphdr *th, int protocol, -- int tcplen) --{ -- __be32 saddr, daddr; -- -- if (sk) { -- saddr = inet_sk(sk)->saddr; -- daddr = inet_sk(sk)->daddr; -- } else { -- struct rtable *rt = (struct rtable *)dst; -- BUG_ON(!rt); -- saddr = rt->rt_src; -- daddr = rt->rt_dst; -- } -- return tcp_v4_do_calc_md5_hash(md5_hash, key, -- saddr, daddr, -- th, protocol, tcplen); --} -- --EXPORT_SYMBOL(tcp_v4_calc_md5_hash); -- --static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) --{ -- /* -- * This gets called for each TCP segment that arrives -- * so we want to be efficient. -- * We have 3 drop cases: -- * o No MD5 hash and one expected. -- * o MD5 hash and we're not expecting one. -- * o MD5 hash and its wrong. -- */ -- __u8 *hash_location = NULL; -- struct tcp_md5sig_key *hash_expected; -- const struct iphdr *iph = ip_hdr(skb); -- struct tcphdr *th = tcp_hdr(skb); -- int length = (th->doff << 2) - sizeof(struct tcphdr); -- int genhash; -- unsigned char *ptr; -- unsigned char newhash[16]; -- -- hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); -- -- /* -- * If the TCP option length is less than the TCP_MD5SIG -- * option length, then we can shortcut -- */ -- if (length < TCPOLEN_MD5SIG) { -- if (hash_expected) -- return 1; -- else -- return 0; -- } -- -- /* Okay, we can't shortcut - we have to grub through the options */ -- ptr = (unsigned char *)(th + 1); -- while (length > 0) { -- int opcode = *ptr++; -- int opsize; -- -- switch (opcode) { -- case TCPOPT_EOL: -- goto done_opts; -- case TCPOPT_NOP: -- length--; -- continue; -- default: -- opsize = *ptr++; -- if (opsize < 2) -- goto done_opts; -- if (opsize > length) -- goto done_opts; -- -- if (opcode == TCPOPT_MD5SIG) { -- hash_location = ptr; -- goto done_opts; -- } -- } -- ptr += opsize-2; -- length -= opsize; -- } --done_opts: -- /* We've parsed the options - do we have a hash? */ -- if (!hash_expected && !hash_location) -- return 0; -- -- if (hash_expected && !hash_location) { -- LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found " -- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", -- NIPQUAD(iph->saddr), ntohs(th->source), -- NIPQUAD(iph->daddr), ntohs(th->dest)); -- return 1; -- } -- -- if (!hash_expected && hash_location) { -- LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " -- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", -- NIPQUAD(iph->saddr), ntohs(th->source), -- NIPQUAD(iph->daddr), ntohs(th->dest)); -- return 1; -- } -- -- /* Okay, so this is hash_expected and hash_location - -- * so we need to calculate the checksum. -- */ -- genhash = tcp_v4_do_calc_md5_hash(newhash, -- hash_expected, -- iph->saddr, iph->daddr, -- th, sk->sk_protocol, -- skb->len); -- -- if (genhash || memcmp(hash_location, newhash, 16) != 0) { -- if (net_ratelimit()) { -- printk(KERN_INFO "MD5 Hash failed for " -- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n", -- NIPQUAD(iph->saddr), ntohs(th->source), -- NIPQUAD(iph->daddr), ntohs(th->dest), -- genhash ? " tcp_v4_calc_md5_hash failed" : ""); -- } -- return 1; -- } -- return 0; --} -- --#endif -- --struct request_sock_ops tcp_request_sock_ops __read_mostly = { -- .family = PF_INET, -- .obj_size = sizeof(struct tcp_request_sock), -- .rtx_syn_ack = tcp_v4_send_synack, -- .send_ack = tcp_v4_reqsk_send_ack, -- .destructor = tcp_v4_reqsk_destructor, -- .send_reset = tcp_v4_send_reset, --}; -- --#ifdef CONFIG_TCP_MD5SIG --static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { -- .md5_lookup = tcp_v4_reqsk_md5_lookup, --}; --#endif -- --static struct timewait_sock_ops tcp_timewait_sock_ops = { -- .twsk_obj_size = sizeof(struct tcp_timewait_sock), -- .twsk_unique = tcp_twsk_unique, -- .twsk_destructor= tcp_twsk_destructor, --}; -- --int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) --{ -- struct inet_request_sock *ireq; -- struct tcp_options_received tmp_opt; -- struct request_sock *req; -- __be32 saddr = ip_hdr(skb)->saddr; -- __be32 daddr = ip_hdr(skb)->daddr; -- __u32 isn = TCP_SKB_CB(skb)->when; -- struct dst_entry *dst = NULL; --#ifdef CONFIG_SYN_COOKIES -- int want_cookie = 0; --#else --#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ --#endif -- -- /* Never answer to SYNs send to broadcast or multicast */ -- if (((struct rtable *)skb->dst)->rt_flags & -- (RTCF_BROADCAST | RTCF_MULTICAST)) -- goto drop; -- -- /* TW buckets are converted to open requests without -- * limitations, they conserve resources and peer is -- * evidently real one. -- */ -- if (inet_csk_reqsk_queue_is_full(sk) && !isn) { --#ifdef CONFIG_SYN_COOKIES -- if (sysctl_tcp_syncookies) { -- want_cookie = 1; -- } else --#endif -- goto drop; -- } -- -- /* Accept backlog is full. If we have already queued enough -- * of warm entries in syn queue, drop request. It is better than -- * clogging syn queue with openreqs with exponentially increasing -- * timeout. -- */ -- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) -- goto drop; -- -- req = reqsk_alloc(&tcp_request_sock_ops); -- if (!req) -- goto drop; -- --#ifdef CONFIG_TCP_MD5SIG -- tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; --#endif -- -- tcp_clear_options(&tmp_opt); -- tmp_opt.mss_clamp = 536; -- tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; -- -- tcp_parse_options(skb, &tmp_opt, 0); -- -- if (want_cookie) { -- tcp_clear_options(&tmp_opt); -- tmp_opt.saw_tstamp = 0; -- } -- -- if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) { -- /* Some OSes (unknown ones, but I see them on web server, which -- * contains information interesting only for windows' -- * users) do not send their stamp in SYN. It is easy case. -- * We simply do not advertise TS support. -- */ -- tmp_opt.saw_tstamp = 0; -- tmp_opt.tstamp_ok = 0; -- } -- tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; -- -- tcp_openreq_init(req, &tmp_opt, skb); -- -- if (security_inet_conn_request(sk, skb, req)) -- goto drop_and_free; -- -- ireq = inet_rsk(req); -- ireq->loc_addr = daddr; -- ireq->rmt_addr = saddr; -- ireq->opt = tcp_v4_save_options(sk, skb); -- if (!want_cookie) -- TCP_ECN_create_request(req, tcp_hdr(skb)); -- -- if (want_cookie) { --#ifdef CONFIG_SYN_COOKIES -- syn_flood_warning(skb); --#endif -- isn = cookie_v4_init_sequence(sk, skb, &req->mss); -- } else if (!isn) { -- struct inet_peer *peer = NULL; -- -- /* VJ's idea. We save last timestamp seen -- * from the destination in peer table, when entering -- * state TIME-WAIT, and check against it before -- * accepting new connection request. -- * -- * If "isn" is not zero, this request hit alive -- * timewait bucket, so that all the necessary checks -- * are made in the function processing timewait state. -- */ -- if (tmp_opt.saw_tstamp && -- tcp_death_row.sysctl_tw_recycle && -- (dst = inet_csk_route_req(sk, req)) != NULL && -- (peer = rt_get_peer((struct rtable *)dst)) != NULL && -- peer->v4daddr == saddr) { -- if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && -- (s32)(peer->tcp_ts - req->ts_recent) > -- TCP_PAWS_WINDOW) { -- NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); -- dst_release(dst); -- goto drop_and_free; -- } -- } -- /* Kill the following clause, if you dislike this way. */ -- else if (!sysctl_tcp_syncookies && -- (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < -- (sysctl_max_syn_backlog >> 2)) && -- (!peer || !peer->tcp_ts_stamp) && -- (!dst || !dst_metric(dst, RTAX_RTT))) { -- /* Without syncookies last quarter of -- * backlog is filled with destinations, -- * proven to be alive. -- * It means that we continue to communicate -- * to destinations, already remembered -- * to the moment of synflood. -- */ -- LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " -- "request from %u.%u.%u.%u/%u\n", -- NIPQUAD(saddr), -- ntohs(tcp_hdr(skb)->source)); -- dst_release(dst); -- goto drop_and_free; -- } -- -- isn = tcp_v4_init_sequence(skb); -- } -- tcp_rsk(req)->snt_isn = isn; -- -- if (tcp_v4_send_synack(sk, req, dst)) -- goto drop_and_free; -- -- if (want_cookie) { -- reqsk_free(req); -- } else { -- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); -- } -- return 0; -- --drop_and_free: -- reqsk_free(req); --drop: -- return 0; --} -- -- --/* -- * The three way handshake has completed - we got a valid synack - -- * now create the new socket. -- */ --struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, -- struct request_sock *req, -- struct dst_entry *dst) --{ -- struct inet_request_sock *ireq; -- struct inet_sock *newinet; -- struct tcp_sock *newtp; -- struct sock *newsk; --#ifdef CONFIG_TCP_MD5SIG -- struct tcp_md5sig_key *key; --#endif -- -- if (sk_acceptq_is_full(sk)) -- goto exit_overflow; -- -- if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) -- goto exit; -- -- newsk = tcp_create_openreq_child(sk, req, skb); -- if (!newsk) -- goto exit; -- -- newsk->sk_gso_type = SKB_GSO_TCPV4; -- sk_setup_caps(newsk, dst); -- -- newtp = tcp_sk(newsk); -- newinet = inet_sk(newsk); -- ireq = inet_rsk(req); -- newinet->daddr = ireq->rmt_addr; -- newinet->rcv_saddr = ireq->loc_addr; -- newinet->saddr = ireq->loc_addr; -- newinet->opt = ireq->opt; -- ireq->opt = NULL; -- newinet->mc_index = inet_iif(skb); -- newinet->mc_ttl = ip_hdr(skb)->ttl; -- inet_csk(newsk)->icsk_ext_hdr_len = 0; -- if (newinet->opt) -- inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; -- newinet->id = newtp->write_seq ^ jiffies; -- -- tcp_mtup_init(newsk); -- tcp_sync_mss(newsk, dst_mtu(dst)); -- newtp->advmss = dst_metric(dst, RTAX_ADVMSS); -- tcp_initialize_rcv_mss(newsk); -- --#ifdef CONFIG_TCP_MD5SIG -- /* Copy over the MD5 key from the original socket */ -- if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) { -- /* -- * We're using one, so create a matching key -- * on the newsk structure. If we fail to get -- * memory, then we end up not copying the key -- * across. Shucks. -- */ -- char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); -- if (newkey != NULL) -- tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr, -- newkey, key->keylen); -- } --#endif -- -- __inet_hash(&tcp_hashinfo, newsk, 0); -- __inet_inherit_port(&tcp_hashinfo, sk, newsk); -- -- return newsk; -- --exit_overflow: -- NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); --exit: -- NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); -- dst_release(dst); -- return NULL; --} -- --static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) --{ -- struct tcphdr *th = tcp_hdr(skb); -- const struct iphdr *iph = ip_hdr(skb); -- struct sock *nsk; -- struct request_sock **prev; -- /* Find possible connection requests. */ -- struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, -- iph->saddr, iph->daddr); -- if (req) -- return tcp_check_req(sk, skb, req, prev); -- -- nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, -- iph->daddr, th->dest, inet_iif(skb)); -- -- if (nsk) { -- if (nsk->sk_state != TCP_TIME_WAIT) { -- bh_lock_sock(nsk); -- return nsk; -- } -- inet_twsk_put(inet_twsk(nsk)); -- return NULL; -- } -- --#ifdef CONFIG_SYN_COOKIES -- if (!th->rst && !th->syn && th->ack) -- sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); --#endif -- return sk; --} -- --static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) --{ -- const struct iphdr *iph = ip_hdr(skb); -- -- if (skb->ip_summed == CHECKSUM_COMPLETE) { -- if (!tcp_v4_check(skb->len, iph->saddr, -- iph->daddr, skb->csum)) { -- skb->ip_summed = CHECKSUM_UNNECESSARY; -- return 0; -- } -- } -- -- skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, -- skb->len, IPPROTO_TCP, 0); -- -- if (skb->len <= 76) { -- return __skb_checksum_complete(skb); -- } -- return 0; --} -- -- --/* The socket must have it's spinlock held when we get -- * here. -- * -- * We have a potential double-lock case here, so even when -- * doing backlog processing we use the BH locking scheme. -- * This is because we cannot sleep with the original spinlock -- * held. -- */ --int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) --{ -- struct sock *rsk; --#ifdef CONFIG_TCP_MD5SIG -- /* -- * We really want to reject the packet as early as possible -- * if: -- * o We're expecting an MD5'd packet and this is no MD5 tcp option -- * o There is an MD5 option and we're not expecting one -- */ -- if (tcp_v4_inbound_md5_hash(sk, skb)) -- goto discard; --#endif -- -- if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ -- TCP_CHECK_TIMER(sk); -- if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { -- rsk = sk; -- goto reset; -- } -- TCP_CHECK_TIMER(sk); -- return 0; -- } -- -- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) -- goto csum_err; -- -- if (sk->sk_state == TCP_LISTEN) { -- struct sock *nsk = tcp_v4_hnd_req(sk, skb); -- if (!nsk) -- goto discard; -- -- if (nsk != sk) { -- if (tcp_child_process(sk, nsk, skb)) { -- rsk = nsk; -- goto reset; -- } -- return 0; -- } -- } -- -- TCP_CHECK_TIMER(sk); -- if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { -- rsk = sk; -- goto reset; -- } -- TCP_CHECK_TIMER(sk); -- return 0; -- --reset: -- tcp_v4_send_reset(rsk, skb); --discard: -- kfree_skb(skb); -- /* Be careful here. If this function gets more complicated and -- * gcc suffers from register pressure on the x86, sk (in %ebx) -- * might be destroyed here. This current version compiles correctly, -- * but you have been warned. -- */ -- return 0; -- --csum_err: -- TCP_INC_STATS_BH(TCP_MIB_INERRS); -- goto discard; --} -- --/* -- * From tcp_input.c -- */ -- --int tcp_v4_rcv(struct sk_buff *skb) --{ -- const struct iphdr *iph; -- struct tcphdr *th; -- struct sock *sk; -- int ret; -- -- if (skb->pkt_type != PACKET_HOST) -- goto discard_it; -- -- /* Count it even if it's bad */ -- TCP_INC_STATS_BH(TCP_MIB_INSEGS); -- -- if (!pskb_may_pull(skb, sizeof(struct tcphdr))) -- goto discard_it; -- -- th = tcp_hdr(skb); -- -- if (th->doff < sizeof(struct tcphdr) / 4) -- goto bad_packet; -- if (!pskb_may_pull(skb, th->doff * 4)) -- goto discard_it; -- -- /* An explanation is required here, I think. -- * Packet length and doff are validated by header prediction, -- * provided case of th->doff==0 is eliminated. -- * So, we defer the checks. */ -- if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) -- goto bad_packet; -- -- th = tcp_hdr(skb); -- iph = ip_hdr(skb); -- TCP_SKB_CB(skb)->seq = ntohl(th->seq); -- TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + -- skb->len - th->doff * 4); -- TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); -- TCP_SKB_CB(skb)->when = 0; -- TCP_SKB_CB(skb)->flags = iph->tos; -- TCP_SKB_CB(skb)->sacked = 0; -- -- sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, -- iph->daddr, th->dest, inet_iif(skb)); -- if (!sk) -- goto no_tcp_socket; -- --process: -- if (sk->sk_state == TCP_TIME_WAIT) -- goto do_time_wait; -- -- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) -- goto discard_and_relse; -- nf_reset(skb); -- -- if (sk_filter(sk, skb)) -- goto discard_and_relse; -- -- skb->dev = NULL; -- -- bh_lock_sock_nested(sk); -- ret = 0; -- if (!sock_owned_by_user(sk)) { --#ifdef CONFIG_NET_DMA -- struct tcp_sock *tp = tcp_sk(sk); -- if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) -- tp->ucopy.dma_chan = get_softnet_dma(); -- if (tp->ucopy.dma_chan) -- ret = tcp_v4_do_rcv(sk, skb); -- else --#endif -- { -- if (!tcp_prequeue(sk, skb)) -- ret = tcp_v4_do_rcv(sk, skb); -- } -- } else -- sk_add_backlog(sk, skb); -- bh_unlock_sock(sk); -- -- sock_put(sk); -- -- return ret; -- --no_tcp_socket: -- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) -- goto discard_it; -- -- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { --bad_packet: -- TCP_INC_STATS_BH(TCP_MIB_INERRS); -- } else { -- tcp_v4_send_reset(NULL, skb); -- } -- --discard_it: -- /* Discard frame. */ -- kfree_skb(skb); -- return 0; -- --discard_and_relse: -- sock_put(sk); -- goto discard_it; -- --do_time_wait: -- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { -- inet_twsk_put(inet_twsk(sk)); -- goto discard_it; -- } -- -- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { -- TCP_INC_STATS_BH(TCP_MIB_INERRS); -- inet_twsk_put(inet_twsk(sk)); -- goto discard_it; -- } -- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { -- case TCP_TW_SYN: { -- struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, -- iph->daddr, th->dest, -- inet_iif(skb)); -- if (sk2) { -- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); -- inet_twsk_put(inet_twsk(sk)); -- sk = sk2; -- goto process; -- } -- /* Fall through to ACK */ -- } -- case TCP_TW_ACK: -- tcp_v4_timewait_ack(sk, skb); -- break; -- case TCP_TW_RST: -- goto no_tcp_socket; -- case TCP_TW_SUCCESS:; -- } -- goto discard_it; --} -- --/* VJ's idea. Save last timestamp seen from this destination -- * and hold it at least for normal timewait interval to use for duplicate -- * segment detection in subsequent connections, before they enter synchronized -- * state. -- */ -- --int tcp_v4_remember_stamp(struct sock *sk) --{ -- struct inet_sock *inet = inet_sk(sk); -- struct tcp_sock *tp = tcp_sk(sk); -- struct rtable *rt = (struct rtable *)__sk_dst_get(sk); -- struct inet_peer *peer = NULL; -- int release_it = 0; -- -- if (!rt || rt->rt_dst != inet->daddr) { -- peer = inet_getpeer(inet->daddr, 1); -- release_it = 1; -- } else { -- if (!rt->peer) -- rt_bind_peer(rt, 1); -- peer = rt->peer; -- } -- -- if (peer) { -- if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || -- (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && -- peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { -- peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; -- peer->tcp_ts = tp->rx_opt.ts_recent; -- } -- if (release_it) -- inet_putpeer(peer); -- return 1; -- } -- -- return 0; --} -- --int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) --{ -- struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); -- -- if (peer) { -- const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); -- -- if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || -- (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && -- peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { -- peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; -- peer->tcp_ts = tcptw->tw_ts_recent; -- } -- inet_putpeer(peer); -- return 1; -- } -- -- return 0; --} -- --struct inet_connection_sock_af_ops ipv4_specific = { -- .queue_xmit = ip_queue_xmit, -- .send_check = tcp_v4_send_check, -- .rebuild_header = inet_sk_rebuild_header, -- .conn_request = tcp_v4_conn_request, -- .syn_recv_sock = tcp_v4_syn_recv_sock, -- .remember_stamp = tcp_v4_remember_stamp, -- .net_header_len = sizeof(struct iphdr), -- .setsockopt = ip_setsockopt, -- .getsockopt = ip_getsockopt, -- .addr2sockaddr = inet_csk_addr2sockaddr, -- .sockaddr_len = sizeof(struct sockaddr_in), --#ifdef CONFIG_COMPAT -- .compat_setsockopt = compat_ip_setsockopt, -- .compat_getsockopt = compat_ip_getsockopt, --#endif --}; -- --#ifdef CONFIG_TCP_MD5SIG --static struct tcp_sock_af_ops tcp_sock_ipv4_specific = { -- .md5_lookup = tcp_v4_md5_lookup, -- .calc_md5_hash = tcp_v4_calc_md5_hash, -- .md5_add = tcp_v4_md5_add_func, -- .md5_parse = tcp_v4_parse_md5_keys, --}; --#endif -- --/* NOTE: A lot of things set to zero explicitly by call to -- * sk_alloc() so need not be done here. -- */ --static int tcp_v4_init_sock(struct sock *sk) --{ -- struct inet_connection_sock *icsk = inet_csk(sk); -- struct tcp_sock *tp = tcp_sk(sk); -- -- skb_queue_head_init(&tp->out_of_order_queue); -- tcp_init_xmit_timers(sk); -- tcp_prequeue_init(tp); -- -- icsk->icsk_rto = TCP_TIMEOUT_INIT; -- tp->mdev = TCP_TIMEOUT_INIT; -- -- /* So many TCP implementations out there (incorrectly) count the -- * initial SYN frame in their delayed-ACK and congestion control -- * algorithms that we must have the following bandaid to talk -- * efficiently to them. -DaveM -- */ -- tp->snd_cwnd = 2; -- -- /* See draft-stevens-tcpca-spec-01 for discussion of the -- * initialization of these values. -- */ -- tp->snd_ssthresh = 0x7fffffff; /* Infinity */ -- tp->snd_cwnd_clamp = ~0; -- tp->mss_cache = 536; -- -- tp->reordering = sysctl_tcp_reordering; -- icsk->icsk_ca_ops = &tcp_init_congestion_ops; -- -- sk->sk_state = TCP_CLOSE; -- -- sk->sk_write_space = sk_stream_write_space; -- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); -- -- icsk->icsk_af_ops = &ipv4_specific; -- icsk->icsk_sync_mss = tcp_sync_mss; --#ifdef CONFIG_TCP_MD5SIG -- tp->af_specific = &tcp_sock_ipv4_specific; --#endif -- -- sk->sk_sndbuf = sysctl_tcp_wmem[1]; -- sk->sk_rcvbuf = sysctl_tcp_rmem[1]; -- -- atomic_inc(&tcp_sockets_allocated); -- -- return 0; --} -- --int tcp_v4_destroy_sock(struct sock *sk) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- -- tcp_clear_xmit_timers(sk); -- -- tcp_cleanup_congestion_control(sk); -- -- /* Cleanup up the write buffer. */ -- tcp_write_queue_purge(sk); -- -- /* Cleans up our, hopefully empty, out_of_order_queue. */ -- __skb_queue_purge(&tp->out_of_order_queue); -- --#ifdef CONFIG_TCP_MD5SIG -- /* Clean up the MD5 key list, if any */ -- if (tp->md5sig_info) { -- tcp_v4_clear_md5_list(sk); -- kfree(tp->md5sig_info); -- tp->md5sig_info = NULL; -- } --#endif -- --#ifdef CONFIG_NET_DMA -- /* Cleans up our sk_async_wait_queue */ -- __skb_queue_purge(&sk->sk_async_wait_queue); --#endif -- -- /* Clean prequeue, it must be empty really */ -- __skb_queue_purge(&tp->ucopy.prequeue); -- -- /* Clean up a referenced TCP bind bucket. */ -- if (inet_csk(sk)->icsk_bind_hash) -- inet_put_port(&tcp_hashinfo, sk); -- -- /* -- * If sendmsg cached page exists, toss it. -- */ -- if (sk->sk_sndmsg_page) { -- __free_page(sk->sk_sndmsg_page); -- sk->sk_sndmsg_page = NULL; -- } -- -- atomic_dec(&tcp_sockets_allocated); -- -- return 0; --} -- --EXPORT_SYMBOL(tcp_v4_destroy_sock); -- --#ifdef CONFIG_PROC_FS --/* Proc filesystem TCP sock list dumping. */ -- --static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) --{ -- return hlist_empty(head) ? NULL : -- list_entry(head->first, struct inet_timewait_sock, tw_node); --} -- --static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) --{ -- return tw->tw_node.next ? -- hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; --} -- --static void *listening_get_next(struct seq_file *seq, void *cur) --{ -- struct inet_connection_sock *icsk; -- struct hlist_node *node; -- struct sock *sk = cur; -- struct tcp_iter_state* st = seq->private; -- -- if (!sk) { -- st->bucket = 0; -- sk = sk_head(&tcp_hashinfo.listening_hash[0]); -- goto get_sk; -- } -- -- ++st->num; -- -- if (st->state == TCP_SEQ_STATE_OPENREQ) { -- struct request_sock *req = cur; -- -- icsk = inet_csk(st->syn_wait_sk); -- req = req->dl_next; -- while (1) { -- while (req) { -- if (req->rsk_ops->family == st->family) { -- cur = req; -- goto out; -- } -- req = req->dl_next; -- } -- if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) -- break; --get_req: -- req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; -- } -- sk = sk_next(st->syn_wait_sk); -- st->state = TCP_SEQ_STATE_LISTENING; -- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); -- } else { -- icsk = inet_csk(sk); -- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); -- if (reqsk_queue_len(&icsk->icsk_accept_queue)) -- goto start_req; -- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); -- sk = sk_next(sk); -- } --get_sk: -- sk_for_each_from(sk, node) { -- if (sk->sk_family == st->family) { -- cur = sk; -- goto out; -- } -- icsk = inet_csk(sk); -- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); -- if (reqsk_queue_len(&icsk->icsk_accept_queue)) { --start_req: -- st->uid = sock_i_uid(sk); -- st->syn_wait_sk = sk; -- st->state = TCP_SEQ_STATE_OPENREQ; -- st->sbucket = 0; -- goto get_req; -- } -- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); -- } -- if (++st->bucket < INET_LHTABLE_SIZE) { -- sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); -- goto get_sk; -- } -- cur = NULL; --out: -- return cur; --} -- --static void *listening_get_idx(struct seq_file *seq, loff_t *pos) --{ -- void *rc = listening_get_next(seq, NULL); -- -- while (rc && *pos) { -- rc = listening_get_next(seq, rc); -- --*pos; -- } -- return rc; --} -- --static void *established_get_first(struct seq_file *seq) --{ -- struct tcp_iter_state* st = seq->private; -- void *rc = NULL; -- -- for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { -- struct sock *sk; -- struct hlist_node *node; -- struct inet_timewait_sock *tw; -- -- /* We can reschedule _before_ having picked the target: */ -- cond_resched_softirq(); -- -- read_lock(&tcp_hashinfo.ehash[st->bucket].lock); -- sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { -- if (sk->sk_family != st->family) { -- continue; -- } -- rc = sk; -- goto out; -- } -- st->state = TCP_SEQ_STATE_TIME_WAIT; -- inet_twsk_for_each(tw, node, -- &tcp_hashinfo.ehash[st->bucket].twchain) { -- if (tw->tw_family != st->family) { -- continue; -- } -- rc = tw; -- goto out; -- } -- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); -- st->state = TCP_SEQ_STATE_ESTABLISHED; -- } --out: -- return rc; --} -- --static void *established_get_next(struct seq_file *seq, void *cur) --{ -- struct sock *sk = cur; -- struct inet_timewait_sock *tw; -- struct hlist_node *node; -- struct tcp_iter_state* st = seq->private; -- -- ++st->num; -- -- if (st->state == TCP_SEQ_STATE_TIME_WAIT) { -- tw = cur; -- tw = tw_next(tw); --get_tw: -- while (tw && tw->tw_family != st->family) { -- tw = tw_next(tw); -- } -- if (tw) { -- cur = tw; -- goto out; -- } -- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); -- st->state = TCP_SEQ_STATE_ESTABLISHED; -- -- /* We can reschedule between buckets: */ -- cond_resched_softirq(); -- -- if (++st->bucket < tcp_hashinfo.ehash_size) { -- read_lock(&tcp_hashinfo.ehash[st->bucket].lock); -- sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); -- } else { -- cur = NULL; -- goto out; -- } -- } else -- sk = sk_next(sk); -- -- sk_for_each_from(sk, node) { -- if (sk->sk_family == st->family) -- goto found; -- } -- -- st->state = TCP_SEQ_STATE_TIME_WAIT; -- tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); -- goto get_tw; --found: -- cur = sk; --out: -- return cur; --} -- --static void *established_get_idx(struct seq_file *seq, loff_t pos) --{ -- void *rc = established_get_first(seq); -- -- while (rc && pos) { -- rc = established_get_next(seq, rc); -- --pos; -- } -- return rc; --} -- --static void *tcp_get_idx(struct seq_file *seq, loff_t pos) --{ -- void *rc; -- struct tcp_iter_state* st = seq->private; -- -- inet_listen_lock(&tcp_hashinfo); -- st->state = TCP_SEQ_STATE_LISTENING; -- rc = listening_get_idx(seq, &pos); -- -- if (!rc) { -- inet_listen_unlock(&tcp_hashinfo); -- local_bh_disable(); -- st->state = TCP_SEQ_STATE_ESTABLISHED; -- rc = established_get_idx(seq, pos); -- } -- -- return rc; --} -- --static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) --{ -- struct tcp_iter_state* st = seq->private; -- st->state = TCP_SEQ_STATE_LISTENING; -- st->num = 0; -- return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; --} -- --static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) --{ -- void *rc = NULL; -- struct tcp_iter_state* st; -- -- if (v == SEQ_START_TOKEN) { -- rc = tcp_get_idx(seq, 0); -- goto out; -- } -- st = seq->private; -- -- switch (st->state) { -- case TCP_SEQ_STATE_OPENREQ: -- case TCP_SEQ_STATE_LISTENING: -- rc = listening_get_next(seq, v); -- if (!rc) { -- inet_listen_unlock(&tcp_hashinfo); -- local_bh_disable(); -- st->state = TCP_SEQ_STATE_ESTABLISHED; -- rc = established_get_first(seq); -- } -- break; -- case TCP_SEQ_STATE_ESTABLISHED: -- case TCP_SEQ_STATE_TIME_WAIT: -- rc = established_get_next(seq, v); -- break; -- } --out: -- ++*pos; -- return rc; --} -- --static void tcp_seq_stop(struct seq_file *seq, void *v) --{ -- struct tcp_iter_state* st = seq->private; -- -- switch (st->state) { -- case TCP_SEQ_STATE_OPENREQ: -- if (v) { -- struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); -- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); -- } -- case TCP_SEQ_STATE_LISTENING: -- if (v != SEQ_START_TOKEN) -- inet_listen_unlock(&tcp_hashinfo); -- break; -- case TCP_SEQ_STATE_TIME_WAIT: -- case TCP_SEQ_STATE_ESTABLISHED: -- if (v) -- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); -- local_bh_enable(); -- break; -- } --} -- --static int tcp_seq_open(struct inode *inode, struct file *file) --{ -- struct tcp_seq_afinfo *afinfo = PDE(inode)->data; -- struct seq_file *seq; -- struct tcp_iter_state *s; -- int rc; -- -- if (unlikely(afinfo == NULL)) -- return -EINVAL; -- -- s = kzalloc(sizeof(*s), GFP_KERNEL); -- if (!s) -- return -ENOMEM; -- s->family = afinfo->family; -- s->seq_ops.start = tcp_seq_start; -- s->seq_ops.next = tcp_seq_next; -- s->seq_ops.show = afinfo->seq_show; -- s->seq_ops.stop = tcp_seq_stop; -- -- rc = seq_open(file, &s->seq_ops); -- if (rc) -- goto out_kfree; -- seq = file->private_data; -- seq->private = s; --out: -- return rc; --out_kfree: -- kfree(s); -- goto out; --} -- --int tcp_proc_register(struct tcp_seq_afinfo *afinfo) --{ -- int rc = 0; -- struct proc_dir_entry *p; -- -- if (!afinfo) -- return -EINVAL; -- afinfo->seq_fops->owner = afinfo->owner; -- afinfo->seq_fops->open = tcp_seq_open; -- afinfo->seq_fops->read = seq_read; -- afinfo->seq_fops->llseek = seq_lseek; -- afinfo->seq_fops->release = seq_release_private; -- -- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); -- if (p) -- p->data = afinfo; -- else -- rc = -ENOMEM; -- return rc; --} -- --void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) --{ -- if (!afinfo) -- return; -- proc_net_remove(afinfo->name); -- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); --} -- --static void get_openreq4(struct sock *sk, struct request_sock *req, -- char *tmpbuf, int i, int uid) --{ -- const struct inet_request_sock *ireq = inet_rsk(req); -- int ttd = req->expires - jiffies; -- -- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" -- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p", -- i, -- ireq->loc_addr, -- ntohs(inet_sk(sk)->sport), -- ireq->rmt_addr, -- ntohs(ireq->rmt_port), -- TCP_SYN_RECV, -- 0, 0, /* could print option size, but that is af dependent. */ -- 1, /* timers active (only the expire timer) */ -- jiffies_to_clock_t(ttd), -- req->retrans, -- uid, -- 0, /* non standard timer */ -- 0, /* open_requests have no inode */ -- atomic_read(&sk->sk_refcnt), -- req); --} -- --static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i) --{ -- int timer_active; -- unsigned long timer_expires; -- struct tcp_sock *tp = tcp_sk(sk); -- const struct inet_connection_sock *icsk = inet_csk(sk); -- struct inet_sock *inet = inet_sk(sk); -- __be32 dest = inet->daddr; -- __be32 src = inet->rcv_saddr; -- __u16 destp = ntohs(inet->dport); -- __u16 srcp = ntohs(inet->sport); -- -- if (icsk->icsk_pending == ICSK_TIME_RETRANS) { -- timer_active = 1; -- timer_expires = icsk->icsk_timeout; -- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { -- timer_active = 4; -- timer_expires = icsk->icsk_timeout; -- } else if (timer_pending(&sk->sk_timer)) { -- timer_active = 2; -- timer_expires = sk->sk_timer.expires; -- } else { -- timer_active = 0; -- timer_expires = jiffies; -- } -- -- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " -- "%08X %5d %8d %lu %d %p %u %u %u %u %d", -- i, src, srcp, dest, destp, sk->sk_state, -- tp->write_seq - tp->snd_una, -- sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : -- (tp->rcv_nxt - tp->copied_seq), -- timer_active, -- jiffies_to_clock_t(timer_expires - jiffies), -- icsk->icsk_retransmits, -- sock_i_uid(sk), -- icsk->icsk_probes_out, -- sock_i_ino(sk), -- atomic_read(&sk->sk_refcnt), sk, -- icsk->icsk_rto, -- icsk->icsk_ack.ato, -- (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, -- tp->snd_cwnd, -- tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); --} -- --static void get_timewait4_sock(struct inet_timewait_sock *tw, -- char *tmpbuf, int i) --{ -- __be32 dest, src; -- __u16 destp, srcp; -- int ttd = tw->tw_ttd - jiffies; -- -- if (ttd < 0) -- ttd = 0; -- -- dest = tw->tw_daddr; -- src = tw->tw_rcv_saddr; -- destp = ntohs(tw->tw_dport); -- srcp = ntohs(tw->tw_sport); -- -- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" -- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p", -- i, src, srcp, dest, destp, tw->tw_substate, 0, 0, -- 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, -- atomic_read(&tw->tw_refcnt), tw); --} -- --#define TMPSZ 150 -- --static int tcp4_seq_show(struct seq_file *seq, void *v) --{ -- struct tcp_iter_state* st; -- char tmpbuf[TMPSZ + 1]; -- -- if (v == SEQ_START_TOKEN) { -- seq_printf(seq, "%-*s\n", TMPSZ - 1, -- " sl local_address rem_address st tx_queue " -- "rx_queue tr tm->when retrnsmt uid timeout " -- "inode"); -- goto out; -- } -- st = seq->private; -- -- switch (st->state) { -- case TCP_SEQ_STATE_LISTENING: -- case TCP_SEQ_STATE_ESTABLISHED: -- get_tcp4_sock(v, tmpbuf, st->num); -- break; -- case TCP_SEQ_STATE_OPENREQ: -- get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid); -- break; -- case TCP_SEQ_STATE_TIME_WAIT: -- get_timewait4_sock(v, tmpbuf, st->num); -- break; -- } -- seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf); --out: -- return 0; --} -- --static struct file_operations tcp4_seq_fops; --static struct tcp_seq_afinfo tcp4_seq_afinfo = { -- .owner = THIS_MODULE, -- .name = "tcp", -- .family = AF_INET, -- .seq_show = tcp4_seq_show, -- .seq_fops = &tcp4_seq_fops, --}; -- --int __init tcp4_proc_init(void) --{ -- return tcp_proc_register(&tcp4_seq_afinfo); --} -- --void tcp4_proc_exit(void) --{ -- tcp_proc_unregister(&tcp4_seq_afinfo); --} --#endif /* CONFIG_PROC_FS */ -- --struct proto tcp_prot = { -- .name = "TCP", -- .owner = THIS_MODULE, -- .close = tcp_close, -- .connect = tcp_v4_connect, -- .disconnect = tcp_disconnect, -- .accept = inet_csk_accept, -- .ioctl = tcp_ioctl, -- .init = tcp_v4_init_sock, -- .destroy = tcp_v4_destroy_sock, -- .shutdown = tcp_shutdown, -- .setsockopt = tcp_setsockopt, -- .getsockopt = tcp_getsockopt, -- .recvmsg = tcp_recvmsg, -- .backlog_rcv = tcp_v4_do_rcv, -- .hash = tcp_v4_hash, -- .unhash = tcp_unhash, -- .get_port = tcp_v4_get_port, -- .enter_memory_pressure = tcp_enter_memory_pressure, -- .sockets_allocated = &tcp_sockets_allocated, -- .orphan_count = &tcp_orphan_count, -- .memory_allocated = &tcp_memory_allocated, -- .memory_pressure = &tcp_memory_pressure, -- .sysctl_mem = sysctl_tcp_mem, -- .sysctl_wmem = sysctl_tcp_wmem, -- .sysctl_rmem = sysctl_tcp_rmem, -- .max_header = MAX_TCP_HEADER, -- .obj_size = sizeof(struct tcp_sock), -- .twsk_prot = &tcp_timewait_sock_ops, -- .rsk_prot = &tcp_request_sock_ops, --#ifdef CONFIG_COMPAT -- .compat_setsockopt = compat_tcp_setsockopt, -- .compat_getsockopt = compat_tcp_getsockopt, --#endif --}; -- --void __init tcp_v4_init(struct net_proto_family *ops) --{ -- if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, -- IPPROTO_TCP) < 0) -- panic("Failed to create the TCP control socket.\n"); --} -- --EXPORT_SYMBOL(ipv4_specific); --EXPORT_SYMBOL(tcp_hashinfo); --EXPORT_SYMBOL(tcp_prot); --EXPORT_SYMBOL(tcp_unhash); --EXPORT_SYMBOL(tcp_v4_conn_request); --EXPORT_SYMBOL(tcp_v4_connect); --EXPORT_SYMBOL(tcp_v4_do_rcv); --EXPORT_SYMBOL(tcp_v4_remember_stamp); --EXPORT_SYMBOL(tcp_v4_send_check); --EXPORT_SYMBOL(tcp_v4_syn_recv_sock); -- --#ifdef CONFIG_PROC_FS --EXPORT_SYMBOL(tcp_proc_register); --EXPORT_SYMBOL(tcp_proc_unregister); --#endif --EXPORT_SYMBOL(sysctl_local_port_range); --EXPORT_SYMBOL(sysctl_tcp_low_latency); -- diff -Nurb linux-2.6.22-570/net/ipv4/tcp_output.c linux-2.6.22-590/net/ipv4/tcp_output.c --- linux-2.6.22-570/net/ipv4/tcp_output.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-590/net/ipv4/tcp_output.c 2008-03-20 13:28:03.000000000 -0400 @@ -186556,4601 +179801,296 @@ diff -Nurb linux-2.6.22-570/net/ipv6/addrconf.c linux-2.6.22-590/net/ipv6/addrco } - for_each_netdev(dev) { -+ for_each_netdev(&init_net, dev) { - struct in_device * in_dev = __in_dev_get_rtnl(dev); - if (in_dev && (dev->flags & IFF_UP)) { - struct in_ifaddr * ifa; -@@ -2245,12 +2246,12 @@ - - /* first try to inherit the link-local address from the link device */ - if (idev->dev->iflink && -- (link_dev = __dev_get_by_index(idev->dev->iflink))) { -+ (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) { - if (!ipv6_inherit_linklocal(idev, link_dev)) - return; - } - /* then try to inherit it from any device */ -- for_each_netdev(link_dev) { -+ for_each_netdev(&init_net, link_dev) { - if (!ipv6_inherit_linklocal(idev, link_dev)) - return; - } -@@ -2282,6 +2283,9 @@ - struct inet6_dev *idev = __in6_dev_get(dev); - int run_pending = 0; - -+ if (dev->nd_net != &init_net) -+ return NOTIFY_DONE; -+ - switch(event) { - case NETDEV_REGISTER: - if (!idev && dev->mtu >= IPV6_MIN_MTU) { -@@ -2419,7 +2423,7 @@ - - ASSERT_RTNL(); - -- if (dev == &loopback_dev && how == 1) -+ if (dev == &init_net.loopback_dev && how == 1) - how = 0; - - rt6_ifdown(dev); -@@ -2850,18 +2854,18 @@ - - int __init if6_proc_init(void) - { -- if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops)) -+ if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops)) - return -ENOMEM; - return 0; - } - - void if6_proc_exit(void) - { -- proc_net_remove("if_inet6"); -+ proc_net_remove(&init_net, "if_inet6"); - } - #endif /* CONFIG_PROC_FS */ - --#ifdef CONFIG_IPV6_MIP6 -+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) - /* Check if address is a home address configured on any interface. */ - int ipv6_chk_home_addr(struct in6_addr *addr) - { -@@ -3017,11 +3021,15 @@ - static int - inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) - { -+ struct net *net = skb->sk->sk_net; - struct ifaddrmsg *ifm; - struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *pfx; - int err; - -+ if (net != &init_net) -+ return -EINVAL; -+ - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); - if (err < 0) - return err; -@@ -3074,6 +3082,7 @@ - static int - inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) - { -+ struct net *net = skb->sk->sk_net; - struct ifaddrmsg *ifm; - struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *pfx; -@@ -3083,6 +3092,9 @@ - u8 ifa_flags; - int err; - -+ if (net != &init_net) -+ return -EINVAL; -+ - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); - if (err < 0) - return err; -@@ -3103,7 +3115,7 @@ - valid_lft = INFINITY_LIFE_TIME; - } - -- dev = __dev_get_by_index(ifm->ifa_index); -+ dev = __dev_get_by_index(&init_net, ifm->ifa_index); - if (dev == NULL) - return -ENODEV; - -@@ -3292,7 +3304,7 @@ - s_ip_idx = ip_idx = cb->args[1]; - - idx = 0; -- for_each_netdev(dev) { -+ for_each_netdev(&init_net, dev) { - if (idx < s_idx) - goto cont; - if (idx > s_idx) -@@ -3367,26 +3379,42 @@ - - static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) - { -+ struct net *net = skb->sk->sk_net; - enum addr_type_t type = UNICAST_ADDR; -+ -+ if (net != &init_net) -+ return 0; -+ - return inet6_dump_addr(skb, cb, type); - } - - static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) - { -+ struct net *net = skb->sk->sk_net; - enum addr_type_t type = MULTICAST_ADDR; -+ -+ if (net != &init_net) -+ return 0; -+ - return inet6_dump_addr(skb, cb, type); - } - - - static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) - { -+ struct net *net = skb->sk->sk_net; - enum addr_type_t type = ANYCAST_ADDR; -+ -+ if (net != &init_net) -+ return 0; -+ - return inet6_dump_addr(skb, cb, type); - } - - static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, - void *arg) - { -+ struct net *net = in_skb->sk->sk_net; - struct ifaddrmsg *ifm; - struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *addr = NULL; -@@ -3395,6 +3423,9 @@ - struct sk_buff *skb; - int err; - -+ if (net != &init_net) -+ return -EINVAL; -+ - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); - if (err < 0) - goto errout; -@@ -3407,7 +3438,7 @@ - - ifm = nlmsg_data(nlh); - if (ifm->ifa_index) -- dev = __dev_get_by_index(ifm->ifa_index); -+ dev = __dev_get_by_index(&init_net, ifm->ifa_index); - - if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { - err = -EADDRNOTAVAIL; -@@ -3427,7 +3458,7 @@ - kfree_skb(skb); - goto errout_ifa; - } -- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); -+ err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); - errout_ifa: - in6_ifa_put(ifa); - errout: -@@ -3450,10 +3481,10 @@ - kfree_skb(skb); - goto errout; - } -- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); -+ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); - errout: - if (err < 0) -- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); -+ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); - } - - static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, -@@ -3612,19 +3643,22 @@ - - static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) - { -+ struct net *net = skb->sk->sk_net; - int idx, err; - int s_idx = cb->args[0]; - struct net_device *dev; - struct inet6_dev *idev; - struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL; - -+ if (net != &init_net) -+ return 0; - /* FIXME: maybe disable ipv6 on non v6 guests? - if (skb->sk && skb->sk->sk_vx_info) - return skb->len; */ - - read_lock(&dev_base_lock); - idx = 0; -- for_each_netdev(dev) { -+ for_each_netdev(&init_net, dev) { - if (idx < s_idx) - goto cont; - if (!v6_dev_in_nx_info(dev, nxi)) -@@ -3661,10 +3695,10 @@ - kfree_skb(skb); - goto errout; - } -- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); -+ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); - errout: - if (err < 0) -- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); -+ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); - } - - static inline size_t inet6_prefix_nlmsg_size(void) -@@ -3730,10 +3764,10 @@ - kfree_skb(skb); - goto errout; - } -- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); -+ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); - errout: - if (err < 0) -- rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); -+ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err); - } - - static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) -@@ -4244,16 +4278,16 @@ - * device and it being up should be removed. - */ - rtnl_lock(); -- if (!ipv6_add_dev(&loopback_dev)) -+ if (!ipv6_add_dev(&init_net.loopback_dev)) - err = -ENOMEM; - rtnl_unlock(); - if (err) - return err; - -- ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); -+ ip6_null_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); - #ifdef CONFIG_IPV6_MULTIPLE_TABLES -- ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev); -- ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev); -+ ip6_prohibit_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); -+ ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); - #endif - - register_netdevice_notifier(&ipv6_dev_notf); -@@ -4304,12 +4338,12 @@ - * clean dev list. - */ - -- for_each_netdev(dev) { -+ for_each_netdev(&init_net, dev) { - if ((idev = __in6_dev_get(dev)) == NULL) - continue; - addrconf_ifdown(dev, 1); - } -- addrconf_ifdown(&loopback_dev, 2); -+ addrconf_ifdown(&init_net.loopback_dev, 2); - - /* - * Check hash table. -@@ -4335,6 +4369,6 @@ - rtnl_unlock(); - - #ifdef CONFIG_PROC_FS -- proc_net_remove("if_inet6"); -+ proc_net_remove(&init_net, "if_inet6"); - #endif - } -diff -Nurb linux-2.6.22-570/net/ipv6/addrconf.c.orig linux-2.6.22-590/net/ipv6/addrconf.c.orig ---- linux-2.6.22-570/net/ipv6/addrconf.c.orig 2008-03-20 13:25:40.000000000 -0400 -+++ linux-2.6.22-590/net/ipv6/addrconf.c.orig 1969-12-31 19:00:00.000000000 -0500 -@@ -1,4301 +0,0 @@ --/* -- * IPv6 Address [auto]configuration -- * Linux INET6 implementation -- * -- * Authors: -- * Pedro Roque -- * Alexey Kuznetsov -- * -- * $Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $ -- * -- * This program is free software; you can redistribute it and/or -- * modify it under the terms of the GNU General Public License -- * as published by the Free Software Foundation; either version -- * 2 of the License, or (at your option) any later version. -- */ -- --/* -- * Changes: -- * -- * Janos Farkas : delete timer on ifdown -- * -- * Andi Kleen : kill double kfree on module -- * unload. -- * Maciej W. Rozycki : FDDI support -- * sekiya@USAGI : Don't send too many RS -- * packets. -- * yoshfuji@USAGI : Fixed interval between DAD -- * packets. -- * YOSHIFUJI Hideaki @USAGI : improved accuracy of -- * address validation timer. -- * YOSHIFUJI Hideaki @USAGI : Privacy Extensions (RFC3041) -- * support. -- * Yuji SEKIYA @USAGI : Don't assign a same IPv6 -- * address on a same interface. -- * YOSHIFUJI Hideaki @USAGI : ARCnet support -- * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to -- * seq_file. -- * YOSHIFUJI Hideaki @USAGI : improved source address -- * selection; consider scope, -- * status etc. -- */ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#ifdef CONFIG_SYSCTL --#include --#endif --#include --#include --#include --#include -- --#include --#include -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#ifdef CONFIG_IPV6_PRIVACY --#include --#endif -- --#include --#include -- --#include --#include -- --/* Set to 3 to get tracing... */ --#define ACONF_DEBUG 2 -- --#if ACONF_DEBUG >= 3 --#define ADBG(x) printk x --#else --#define ADBG(x) --#endif -- --#define INFINITY_LIFE_TIME 0xFFFFFFFF --#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b))) -- --#ifdef CONFIG_SYSCTL --static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p); --static void addrconf_sysctl_unregister(struct ipv6_devconf *p); --#endif -- --#ifdef CONFIG_IPV6_PRIVACY --static int __ipv6_regen_rndid(struct inet6_dev *idev); --static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); --static void ipv6_regen_rndid(unsigned long data); -- --static int desync_factor = MAX_DESYNC_FACTOR * HZ; --#endif -- --static int ipv6_count_addresses(struct inet6_dev *idev); -- --/* -- * Configured unicast address hash table -- */ --static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; --static DEFINE_RWLOCK(addrconf_hash_lock); -- --static void addrconf_verify(unsigned long); -- --static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); --static DEFINE_SPINLOCK(addrconf_verify_lock); -- --static void addrconf_join_anycast(struct inet6_ifaddr *ifp); --static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); -- --static int addrconf_ifdown(struct net_device *dev, int how); -- --static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); --static void addrconf_dad_timer(unsigned long data); --static void addrconf_dad_completed(struct inet6_ifaddr *ifp); --static void addrconf_dad_run(struct inet6_dev *idev); --static void addrconf_rs_timer(unsigned long data); --static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); --static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); -- --static void inet6_prefix_notify(int event, struct inet6_dev *idev, -- struct prefix_info *pinfo); --static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev); -- --static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); -- --struct ipv6_devconf ipv6_devconf __read_mostly = { -- .forwarding = 0, -- .hop_limit = IPV6_DEFAULT_HOPLIMIT, -- .mtu6 = IPV6_MIN_MTU, -- .accept_ra = 1, -- .accept_redirects = 1, -- .autoconf = 1, -- .force_mld_version = 0, -- .dad_transmits = 1, -- .rtr_solicits = MAX_RTR_SOLICITATIONS, -- .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, -- .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, --#ifdef CONFIG_IPV6_PRIVACY -- .use_tempaddr = 0, -- .temp_valid_lft = TEMP_VALID_LIFETIME, -- .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, -- .regen_max_retry = REGEN_MAX_RETRY, -- .max_desync_factor = MAX_DESYNC_FACTOR, --#endif -- .max_addresses = IPV6_MAX_ADDRESSES, -- .accept_ra_defrtr = 1, -- .accept_ra_pinfo = 1, --#ifdef CONFIG_IPV6_ROUTER_PREF -- .accept_ra_rtr_pref = 1, -- .rtr_probe_interval = 60 * HZ, --#ifdef CONFIG_IPV6_ROUTE_INFO -- .accept_ra_rt_info_max_plen = 0, --#endif --#endif -- .proxy_ndp = 0, -- .accept_source_route = 0, /* we do not accept RH0 by default. */ --}; -- --static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { -- .forwarding = 0, -- .hop_limit = IPV6_DEFAULT_HOPLIMIT, -- .mtu6 = IPV6_MIN_MTU, -- .accept_ra = 1, -- .accept_redirects = 1, -- .autoconf = 1, -- .dad_transmits = 1, -- .rtr_solicits = MAX_RTR_SOLICITATIONS, -- .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, -- .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, --#ifdef CONFIG_IPV6_PRIVACY -- .use_tempaddr = 0, -- .temp_valid_lft = TEMP_VALID_LIFETIME, -- .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, -- .regen_max_retry = REGEN_MAX_RETRY, -- .max_desync_factor = MAX_DESYNC_FACTOR, --#endif -- .max_addresses = IPV6_MAX_ADDRESSES, -- .accept_ra_defrtr = 1, -- .accept_ra_pinfo = 1, --#ifdef CONFIG_IPV6_ROUTER_PREF -- .accept_ra_rtr_pref = 1, -- .rtr_probe_interval = 60 * HZ, --#ifdef CONFIG_IPV6_ROUTE_INFO -- .accept_ra_rt_info_max_plen = 0, --#endif --#endif -- .proxy_ndp = 0, -- .accept_source_route = 0, /* we do not accept RH0 by default. */ --}; -- --/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ --const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; --const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; -- --/* Check if a valid qdisc is available */ --static inline int addrconf_qdisc_ok(struct net_device *dev) --{ -- return (dev->qdisc != &noop_qdisc); --} -- --static void addrconf_del_timer(struct inet6_ifaddr *ifp) --{ -- if (del_timer(&ifp->timer)) -- __in6_ifa_put(ifp); --} -- --enum addrconf_timer_t --{ -- AC_NONE, -- AC_DAD, -- AC_RS, --}; -- --static void addrconf_mod_timer(struct inet6_ifaddr *ifp, -- enum addrconf_timer_t what, -- unsigned long when) --{ -- if (!del_timer(&ifp->timer)) -- in6_ifa_hold(ifp); -- -- switch (what) { -- case AC_DAD: -- ifp->timer.function = addrconf_dad_timer; -- break; -- case AC_RS: -- ifp->timer.function = addrconf_rs_timer; -- break; -- default:; -- } -- ifp->timer.expires = jiffies + when; -- add_timer(&ifp->timer); --} -- --static int snmp6_alloc_dev(struct inet6_dev *idev) --{ -- int err = -ENOMEM; -- -- if (!idev || !idev->dev) -- return -EINVAL; -- -- if (snmp_mib_init((void **)idev->stats.ipv6, -- sizeof(struct ipstats_mib), -- __alignof__(struct ipstats_mib)) < 0) -- goto err_ip; -- if (snmp_mib_init((void **)idev->stats.icmpv6, -- sizeof(struct icmpv6_mib), -- __alignof__(struct icmpv6_mib)) < 0) -- goto err_icmp; -- -- return 0; -- --err_icmp: -- snmp_mib_free((void **)idev->stats.ipv6); --err_ip: -- return err; --} -- --static int snmp6_free_dev(struct inet6_dev *idev) --{ -- snmp_mib_free((void **)idev->stats.icmpv6); -- snmp_mib_free((void **)idev->stats.ipv6); -- return 0; --} -- --/* Nobody refers to this device, we may destroy it. */ -- --static void in6_dev_finish_destroy_rcu(struct rcu_head *head) --{ -- struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu); -- kfree(idev); --} -- --void in6_dev_finish_destroy(struct inet6_dev *idev) --{ -- struct net_device *dev = idev->dev; -- BUG_TRAP(idev->addr_list==NULL); -- BUG_TRAP(idev->mc_list==NULL); --#ifdef NET_REFCNT_DEBUG -- printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL"); --#endif -- dev_put(dev); -- if (!idev->dead) { -- printk("Freeing alive inet6 device %p\n", idev); -- return; -- } -- snmp6_free_dev(idev); -- call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); --} -- --EXPORT_SYMBOL(in6_dev_finish_destroy); -- --static struct inet6_dev * ipv6_add_dev(struct net_device *dev) --{ -- struct inet6_dev *ndev; -- struct in6_addr maddr; -- -- ASSERT_RTNL(); -- -- if (dev->mtu < IPV6_MIN_MTU) -- return NULL; -- -- ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); -- -- if (ndev == NULL) -- return NULL; -- -- rwlock_init(&ndev->lock); -- ndev->dev = dev; -- memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf)); -- ndev->cnf.mtu6 = dev->mtu; -- ndev->cnf.sysctl = NULL; -- ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); -- if (ndev->nd_parms == NULL) { -- kfree(ndev); -- return NULL; -- } -- /* We refer to the device */ -- dev_hold(dev); -- -- if (snmp6_alloc_dev(ndev) < 0) { -- ADBG((KERN_WARNING -- "%s(): cannot allocate memory for statistics; dev=%s.\n", -- __FUNCTION__, dev->name)); -- neigh_parms_release(&nd_tbl, ndev->nd_parms); -- ndev->dead = 1; -- in6_dev_finish_destroy(ndev); -- return NULL; -- } -- -- if (snmp6_register_dev(ndev) < 0) { -- ADBG((KERN_WARNING -- "%s(): cannot create /proc/net/dev_snmp6/%s\n", -- __FUNCTION__, dev->name)); -- neigh_parms_release(&nd_tbl, ndev->nd_parms); -- ndev->dead = 1; -- in6_dev_finish_destroy(ndev); -- return NULL; -- } -- -- /* One reference from device. We must do this before -- * we invoke __ipv6_regen_rndid(). -- */ -- in6_dev_hold(ndev); -- --#ifdef CONFIG_IPV6_PRIVACY -- init_timer(&ndev->regen_timer); -- ndev->regen_timer.function = ipv6_regen_rndid; -- ndev->regen_timer.data = (unsigned long) ndev; -- if ((dev->flags&IFF_LOOPBACK) || -- dev->type == ARPHRD_TUNNEL || --#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) -- dev->type == ARPHRD_SIT || --#endif -- dev->type == ARPHRD_NONE) { -- printk(KERN_INFO -- "%s: Disabled Privacy Extensions\n", -- dev->name); -- ndev->cnf.use_tempaddr = -1; -- } else { -- in6_dev_hold(ndev); -- ipv6_regen_rndid((unsigned long) ndev); -- } --#endif -- -- if (netif_running(dev) && addrconf_qdisc_ok(dev)) -- ndev->if_flags |= IF_READY; -- -- ipv6_mc_init_dev(ndev); -- ndev->tstamp = jiffies; --#ifdef CONFIG_SYSCTL -- neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, -- NET_IPV6_NEIGH, "ipv6", -- &ndisc_ifinfo_sysctl_change, -- NULL); -- addrconf_sysctl_register(ndev, &ndev->cnf); --#endif -- /* protected by rtnl_lock */ -- rcu_assign_pointer(dev->ip6_ptr, ndev); -- -- /* Join all-node multicast group */ -- ipv6_addr_all_nodes(&maddr); -- ipv6_dev_mc_inc(dev, &maddr); -- -- return ndev; --} -- --static struct inet6_dev * ipv6_find_idev(struct net_device *dev) --{ -- struct inet6_dev *idev; -- -- ASSERT_RTNL(); -- -- if ((idev = __in6_dev_get(dev)) == NULL) { -- if ((idev = ipv6_add_dev(dev)) == NULL) -- return NULL; -- } -- -- if (dev->flags&IFF_UP) -- ipv6_mc_up(idev); -- return idev; --} -- --#ifdef CONFIG_SYSCTL --static void dev_forward_change(struct inet6_dev *idev) --{ -- struct net_device *dev; -- struct inet6_ifaddr *ifa; -- struct in6_addr addr; -- -- if (!idev) -- return; -- dev = idev->dev; -- if (dev && (dev->flags & IFF_MULTICAST)) { -- ipv6_addr_all_routers(&addr); -- -- if (idev->cnf.forwarding) -- ipv6_dev_mc_inc(dev, &addr); -- else -- ipv6_dev_mc_dec(dev, &addr); -- } -- for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { -- if (ifa->flags&IFA_F_TENTATIVE) -- continue; -- if (idev->cnf.forwarding) -- addrconf_join_anycast(ifa); -- else -- addrconf_leave_anycast(ifa); -- } --} -- -- --static void addrconf_forward_change(void) --{ -- struct net_device *dev; -- struct inet6_dev *idev; -- -- read_lock(&dev_base_lock); -- for_each_netdev(dev) { -- rcu_read_lock(); -- idev = __in6_dev_get(dev); -- if (idev) { -- int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); -- idev->cnf.forwarding = ipv6_devconf.forwarding; -- if (changed) -- dev_forward_change(idev); -- } -- rcu_read_unlock(); -- } -- read_unlock(&dev_base_lock); --} --#endif -- --/* Nobody refers to this ifaddr, destroy it */ -- --void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) --{ -- BUG_TRAP(ifp->if_next==NULL); -- BUG_TRAP(ifp->lst_next==NULL); --#ifdef NET_REFCNT_DEBUG -- printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); --#endif -- -- in6_dev_put(ifp->idev); -- -- if (del_timer(&ifp->timer)) -- printk("Timer is still running, when freeing ifa=%p\n", ifp); -- -- if (!ifp->dead) { -- printk("Freeing alive inet6 address %p\n", ifp); -- return; -- } -- dst_release(&ifp->rt->u.dst); -- -- kfree(ifp); --} -- --static void --ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) --{ -- struct inet6_ifaddr *ifa, **ifap; -- int ifp_scope = ipv6_addr_src_scope(&ifp->addr); -- -- /* -- * Each device address list is sorted in order of scope - -- * global before linklocal. -- */ -- for (ifap = &idev->addr_list; (ifa = *ifap) != NULL; -- ifap = &ifa->if_next) { -- if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) -- break; -- } -- -- ifp->if_next = *ifap; -- *ifap = ifp; --} -- --/* On success it returns ifp with increased reference count */ -- --static struct inet6_ifaddr * --ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, -- int scope, u32 flags) --{ -- struct inet6_ifaddr *ifa = NULL; -- struct rt6_info *rt; -- int hash; -- int err = 0; -- -- rcu_read_lock_bh(); -- if (idev->dead) { -- err = -ENODEV; /*XXX*/ -- goto out2; -- } -- -- write_lock(&addrconf_hash_lock); -- -- /* Ignore adding duplicate addresses on an interface */ -- if (ipv6_chk_same_addr(addr, idev->dev)) { -- ADBG(("ipv6_add_addr: already assigned\n")); -- err = -EEXIST; -- goto out; -- } -- -- ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); -- -- if (ifa == NULL) { -- ADBG(("ipv6_add_addr: malloc failed\n")); -- err = -ENOBUFS; -- goto out; -- } -- -- rt = addrconf_dst_alloc(idev, addr, 0); -- if (IS_ERR(rt)) { -- err = PTR_ERR(rt); -- goto out; -- } -- -- ipv6_addr_copy(&ifa->addr, addr); -- -- spin_lock_init(&ifa->lock); -- init_timer(&ifa->timer); -- ifa->timer.data = (unsigned long) ifa; -- ifa->scope = scope; -- ifa->prefix_len = pfxlen; -- ifa->flags = flags | IFA_F_TENTATIVE; -- ifa->cstamp = ifa->tstamp = jiffies; -- -- ifa->rt = rt; -- -- /* -- * part one of RFC 4429, section 3.3 -- * We should not configure an address as -- * optimistic if we do not yet know the link -- * layer address of our nexhop router -- */ -- -- if (rt->rt6i_nexthop == NULL) -- ifa->flags &= ~IFA_F_OPTIMISTIC; -- -- ifa->idev = idev; -- in6_dev_hold(idev); -- /* For caller */ -- in6_ifa_hold(ifa); -- -- /* Add to big hash table */ -- hash = ipv6_addr_hash(addr); -- -- ifa->lst_next = inet6_addr_lst[hash]; -- inet6_addr_lst[hash] = ifa; -- in6_ifa_hold(ifa); -- write_unlock(&addrconf_hash_lock); -- -- write_lock(&idev->lock); -- /* Add to inet6_dev unicast addr list. */ -- ipv6_link_dev_addr(idev, ifa); -- --#ifdef CONFIG_IPV6_PRIVACY -- if (ifa->flags&IFA_F_TEMPORARY) { -- ifa->tmp_next = idev->tempaddr_list; -- idev->tempaddr_list = ifa; -- in6_ifa_hold(ifa); -- } --#endif -- -- in6_ifa_hold(ifa); -- write_unlock(&idev->lock); --out2: -- rcu_read_unlock_bh(); -- -- if (likely(err == 0)) -- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); -- else { -- kfree(ifa); -- ifa = ERR_PTR(err); -- } -- -- return ifa; --out: -- write_unlock(&addrconf_hash_lock); -- goto out2; --} -- --/* This function wants to get referenced ifp and releases it before return */ -- --static void ipv6_del_addr(struct inet6_ifaddr *ifp) --{ -- struct inet6_ifaddr *ifa, **ifap; -- struct inet6_dev *idev = ifp->idev; -- int hash; -- int deleted = 0, onlink = 0; -- unsigned long expires = jiffies; -- -- hash = ipv6_addr_hash(&ifp->addr); -- -- ifp->dead = 1; -- -- write_lock_bh(&addrconf_hash_lock); -- for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL; -- ifap = &ifa->lst_next) { -- if (ifa == ifp) { -- *ifap = ifa->lst_next; -- __in6_ifa_put(ifp); -- ifa->lst_next = NULL; -- break; -- } -- } -- write_unlock_bh(&addrconf_hash_lock); -- -- write_lock_bh(&idev->lock); --#ifdef CONFIG_IPV6_PRIVACY -- if (ifp->flags&IFA_F_TEMPORARY) { -- for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL; -- ifap = &ifa->tmp_next) { -- if (ifa == ifp) { -- *ifap = ifa->tmp_next; -- if (ifp->ifpub) { -- in6_ifa_put(ifp->ifpub); -- ifp->ifpub = NULL; -- } -- __in6_ifa_put(ifp); -- ifa->tmp_next = NULL; -- break; -- } -- } -- } --#endif -- -- for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) { -- if (ifa == ifp) { -- *ifap = ifa->if_next; -- __in6_ifa_put(ifp); -- ifa->if_next = NULL; -- if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) -- break; -- deleted = 1; -- continue; -- } else if (ifp->flags & IFA_F_PERMANENT) { -- if (ipv6_prefix_equal(&ifa->addr, &ifp->addr, -- ifp->prefix_len)) { -- if (ifa->flags & IFA_F_PERMANENT) { -- onlink = 1; -- if (deleted) -- break; -- } else { -- unsigned long lifetime; -- -- if (!onlink) -- onlink = -1; -- -- spin_lock(&ifa->lock); -- lifetime = min_t(unsigned long, -- ifa->valid_lft, 0x7fffffffUL/HZ); -- if (time_before(expires, -- ifa->tstamp + lifetime * HZ)) -- expires = ifa->tstamp + lifetime * HZ; -- spin_unlock(&ifa->lock); -- } -- } -- } -- ifap = &ifa->if_next; -- } -- write_unlock_bh(&idev->lock); -- -- ipv6_ifa_notify(RTM_DELADDR, ifp); -- -- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); -- -- addrconf_del_timer(ifp); -- -- /* -- * Purge or update corresponding prefix -- * -- * 1) we don't purge prefix here if address was not permanent. -- * prefix is managed by its own lifetime. -- * 2) if there're no addresses, delete prefix. -- * 3) if there're still other permanent address(es), -- * corresponding prefix is still permanent. -- * 4) otherwise, update prefix lifetime to the -- * longest valid lifetime among the corresponding -- * addresses on the device. -- * Note: subsequent RA will update lifetime. -- * -- * --yoshfuji -- */ -- if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { -- struct in6_addr prefix; -- struct rt6_info *rt; -- -- ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); -- rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1); -- -- if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { -- if (onlink == 0) { -- ip6_del_rt(rt); -- rt = NULL; -- } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { -- rt->rt6i_expires = expires; -- rt->rt6i_flags |= RTF_EXPIRES; -- } -- } -- dst_release(&rt->u.dst); -- } -- -- in6_ifa_put(ifp); --} -- --#ifdef CONFIG_IPV6_PRIVACY --static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) --{ -- struct inet6_dev *idev = ifp->idev; -- struct in6_addr addr, *tmpaddr; -- unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp; -- int tmp_plen; -- int ret = 0; -- int max_addresses; -- u32 addr_flags; -- -- write_lock(&idev->lock); -- if (ift) { -- spin_lock_bh(&ift->lock); -- memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8); -- spin_unlock_bh(&ift->lock); -- tmpaddr = &addr; -- } else { -- tmpaddr = NULL; -- } --retry: -- in6_dev_hold(idev); -- if (idev->cnf.use_tempaddr <= 0) { -- write_unlock(&idev->lock); -- printk(KERN_INFO -- "ipv6_create_tempaddr(): use_tempaddr is disabled.\n"); -- in6_dev_put(idev); -- ret = -1; -- goto out; -- } -- spin_lock_bh(&ifp->lock); -- if (ifp->regen_count++ >= idev->cnf.regen_max_retry) { -- idev->cnf.use_tempaddr = -1; /*XXX*/ -- spin_unlock_bh(&ifp->lock); -- write_unlock(&idev->lock); -- printk(KERN_WARNING -- "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n"); -- in6_dev_put(idev); -- ret = -1; -- goto out; -- } -- in6_ifa_hold(ifp); -- memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); -- if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) { -- spin_unlock_bh(&ifp->lock); -- write_unlock(&idev->lock); -- printk(KERN_WARNING -- "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n"); -- in6_ifa_put(ifp); -- in6_dev_put(idev); -- ret = -1; -- goto out; -- } -- memcpy(&addr.s6_addr[8], idev->rndid, 8); -- tmp_valid_lft = min_t(__u32, -- ifp->valid_lft, -- idev->cnf.temp_valid_lft); -- tmp_prefered_lft = min_t(__u32, -- ifp->prefered_lft, -- idev->cnf.temp_prefered_lft - desync_factor / HZ); -- tmp_plen = ifp->prefix_len; -- max_addresses = idev->cnf.max_addresses; -- tmp_cstamp = ifp->cstamp; -- tmp_tstamp = ifp->tstamp; -- spin_unlock_bh(&ifp->lock); -- -- write_unlock(&idev->lock); -- -- addr_flags = IFA_F_TEMPORARY; -- /* set in addrconf_prefix_rcv() */ -- if (ifp->flags & IFA_F_OPTIMISTIC) -- addr_flags |= IFA_F_OPTIMISTIC; -- -- ift = !max_addresses || -- ipv6_count_addresses(idev) < max_addresses ? -- ipv6_add_addr(idev, &addr, tmp_plen, -- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, -- addr_flags) : NULL; -- if (!ift || IS_ERR(ift)) { -- in6_ifa_put(ifp); -- in6_dev_put(idev); -- printk(KERN_INFO -- "ipv6_create_tempaddr(): retry temporary address regeneration.\n"); -- tmpaddr = &addr; -- write_lock(&idev->lock); -- goto retry; -- } -- -- spin_lock_bh(&ift->lock); -- ift->ifpub = ifp; -- ift->valid_lft = tmp_valid_lft; -- ift->prefered_lft = tmp_prefered_lft; -- ift->cstamp = tmp_cstamp; -- ift->tstamp = tmp_tstamp; -- spin_unlock_bh(&ift->lock); -- -- addrconf_dad_start(ift, 0); -- in6_ifa_put(ift); -- in6_dev_put(idev); --out: -- return ret; --} --#endif -- --/* -- * Choose an appropriate source address (RFC3484) -- */ --struct ipv6_saddr_score { -- int addr_type; -- unsigned int attrs; -- int matchlen; -- int scope; -- unsigned int rule; --}; -- --#define IPV6_SADDR_SCORE_LOCAL 0x0001 --#define IPV6_SADDR_SCORE_PREFERRED 0x0004 --#define IPV6_SADDR_SCORE_HOA 0x0008 --#define IPV6_SADDR_SCORE_OIF 0x0010 --#define IPV6_SADDR_SCORE_LABEL 0x0020 --#define IPV6_SADDR_SCORE_PRIVACY 0x0040 -- --static inline int ipv6_saddr_preferred(int type) --{ -- if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4| -- IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED)) -- return 1; -- return 0; --} -- --/* static matching label */ --static inline int ipv6_saddr_label(const struct in6_addr *addr, int type) --{ -- /* -- * prefix (longest match) label -- * ----------------------------- -- * ::1/128 0 -- * ::/0 1 -- * 2002::/16 2 -- * ::/96 3 -- * ::ffff:0:0/96 4 -- * fc00::/7 5 -- * 2001::/32 6 -- */ -- if (type & IPV6_ADDR_LOOPBACK) -- return 0; -- else if (type & IPV6_ADDR_COMPATv4) -- return 3; -- else if (type & IPV6_ADDR_MAPPED) -- return 4; -- else if (addr->s6_addr32[0] == htonl(0x20010000)) -- return 6; -- else if (addr->s6_addr16[0] == htons(0x2002)) -- return 2; -- else if ((addr->s6_addr[0] & 0xfe) == 0xfc) -- return 5; -- return 1; --} -- --int ipv6_dev_get_saddr(struct net_device *daddr_dev, -- struct in6_addr *daddr, struct in6_addr *saddr) --{ -- struct ipv6_saddr_score hiscore; -- struct inet6_ifaddr *ifa_result = NULL; -- int daddr_type = __ipv6_addr_type(daddr); -- int daddr_scope = __ipv6_addr_src_scope(daddr_type); -- u32 daddr_label = ipv6_saddr_label(daddr, daddr_type); -- struct net_device *dev; -- -- memset(&hiscore, 0, sizeof(hiscore)); -- -- read_lock(&dev_base_lock); -- rcu_read_lock(); -- -- for_each_netdev(dev) { -- struct inet6_dev *idev; -- struct inet6_ifaddr *ifa; -- -- /* Rule 0: Candidate Source Address (section 4) -- * - multicast and link-local destination address, -- * the set of candidate source address MUST only -- * include addresses assigned to interfaces -- * belonging to the same link as the outgoing -- * interface. -- * (- For site-local destination addresses, the -- * set of candidate source addresses MUST only -- * include addresses assigned to interfaces -- * belonging to the same site as the outgoing -- * interface.) -- */ -- if ((daddr_type & IPV6_ADDR_MULTICAST || -- daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && -- daddr_dev && dev != daddr_dev) -- continue; -- -- idev = __in6_dev_get(dev); -- if (!idev) -- continue; -- -- read_lock_bh(&idev->lock); -- for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) { -- struct ipv6_saddr_score score; -- -- score.addr_type = __ipv6_addr_type(&ifa->addr); -- -- /* Rule 0: -- * - Tentative Address (RFC2462 section 5.4) -- * - A tentative address is not considered -- * "assigned to an interface" in the traditional -- * sense, unless it is also flagged as optimistic. -- * - Candidate Source Address (section 4) -- * - In any case, anycast addresses, multicast -- * addresses, and the unspecified address MUST -- * NOT be included in a candidate set. -- */ -- if ((ifa->flags & IFA_F_TENTATIVE) && -- (!(ifa->flags & IFA_F_OPTIMISTIC))) -- continue; -- if (unlikely(score.addr_type == IPV6_ADDR_ANY || -- score.addr_type & IPV6_ADDR_MULTICAST)) { -- LIMIT_NETDEBUG(KERN_DEBUG -- "ADDRCONF: unspecified / multicast address" -- "assigned as unicast address on %s", -- dev->name); -- continue; -- } -- -- score.attrs = 0; -- score.matchlen = 0; -- score.scope = 0; -- score.rule = 0; -- -- if (ifa_result == NULL) { -- /* record it if the first available entry */ -- goto record_it; -- } -- -- /* Rule 1: Prefer same address */ -- if (hiscore.rule < 1) { -- if (ipv6_addr_equal(&ifa_result->addr, daddr)) -- hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL; -- hiscore.rule++; -- } -- if (ipv6_addr_equal(&ifa->addr, daddr)) { -- score.attrs |= IPV6_SADDR_SCORE_LOCAL; -- if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) { -- score.rule = 1; -- goto record_it; -- } -- } else { -- if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL) -- continue; -- } -- -- /* Rule 2: Prefer appropriate scope */ -- if (hiscore.rule < 2) { -- hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type); -- hiscore.rule++; -- } -- score.scope = __ipv6_addr_src_scope(score.addr_type); -- if (hiscore.scope < score.scope) { -- if (hiscore.scope < daddr_scope) { -- score.rule = 2; -- goto record_it; -- } else -- continue; -- } else if (score.scope < hiscore.scope) { -- if (score.scope < daddr_scope) -- break; /* addresses sorted by scope */ -- else { -- score.rule = 2; -- goto record_it; -- } -- } -- -- /* Rule 3: Avoid deprecated and optimistic addresses */ -- if (hiscore.rule < 3) { -- if (ipv6_saddr_preferred(hiscore.addr_type) || -- (((ifa_result->flags & -- (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) -- hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED; -- hiscore.rule++; -- } -- if (ipv6_saddr_preferred(score.addr_type) || -- (((ifa->flags & -- (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) { -- score.attrs |= IPV6_SADDR_SCORE_PREFERRED; -- if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) { -- score.rule = 3; -- goto record_it; -- } -- } else { -- if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED) -- continue; -- } -- -- /* Rule 4: Prefer home address */ --#ifdef CONFIG_IPV6_MIP6 -- if (hiscore.rule < 4) { -- if (ifa_result->flags & IFA_F_HOMEADDRESS) -- hiscore.attrs |= IPV6_SADDR_SCORE_HOA; -- hiscore.rule++; -- } -- if (ifa->flags & IFA_F_HOMEADDRESS) { -- score.attrs |= IPV6_SADDR_SCORE_HOA; -- if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) { -- score.rule = 4; -- goto record_it; -- } -- } else { -- if (hiscore.attrs & IPV6_SADDR_SCORE_HOA) -- continue; -- } --#else -- if (hiscore.rule < 4) -- hiscore.rule++; --#endif -- -- /* Rule 5: Prefer outgoing interface */ -- if (hiscore.rule < 5) { -- if (daddr_dev == NULL || -- daddr_dev == ifa_result->idev->dev) -- hiscore.attrs |= IPV6_SADDR_SCORE_OIF; -- hiscore.rule++; -- } -- if (daddr_dev == NULL || -- daddr_dev == ifa->idev->dev) { -- score.attrs |= IPV6_SADDR_SCORE_OIF; -- if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) { -- score.rule = 5; -- goto record_it; -- } -- } else { -- if (hiscore.attrs & IPV6_SADDR_SCORE_OIF) -- continue; -- } -- -- /* Rule 6: Prefer matching label */ -- if (hiscore.rule < 6) { -- if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label) -- hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; -- hiscore.rule++; -- } -- if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) { -- score.attrs |= IPV6_SADDR_SCORE_LABEL; -- if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { -- score.rule = 6; -- goto record_it; -- } -- } else { -- if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL) -- continue; -- } -- --#ifdef CONFIG_IPV6_PRIVACY -- /* Rule 7: Prefer public address -- * Note: prefer temprary address if use_tempaddr >= 2 -- */ -- if (hiscore.rule < 7) { -- if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^ -- (ifa_result->idev->cnf.use_tempaddr >= 2)) -- hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY; -- hiscore.rule++; -- } -- if ((!(ifa->flags & IFA_F_TEMPORARY)) ^ -- (ifa->idev->cnf.use_tempaddr >= 2)) { -- score.attrs |= IPV6_SADDR_SCORE_PRIVACY; -- if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) { -- score.rule = 7; -- goto record_it; -- } -- } else { -- if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY) -- continue; -- } --#else -- if (hiscore.rule < 7) -- hiscore.rule++; --#endif -- /* Rule 8: Use longest matching prefix */ -- if (hiscore.rule < 8) { -- hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr); -- hiscore.rule++; -- } -- score.matchlen = ipv6_addr_diff(&ifa->addr, daddr); -- if (score.matchlen > hiscore.matchlen) { -- score.rule = 8; -- goto record_it; -- } --#if 0 -- else if (score.matchlen < hiscore.matchlen) -- continue; --#endif -- -- /* Final Rule: choose first available one */ -- continue; --record_it: -- if (ifa_result) -- in6_ifa_put(ifa_result); -- in6_ifa_hold(ifa); -- ifa_result = ifa; -- hiscore = score; -- } -- read_unlock_bh(&idev->lock); -- } -- rcu_read_unlock(); -- read_unlock(&dev_base_lock); -- -- if (!ifa_result) -- return -EADDRNOTAVAIL; -- -- ipv6_addr_copy(saddr, &ifa_result->addr); -- in6_ifa_put(ifa_result); -- return 0; --} -- -- --int ipv6_get_saddr(struct dst_entry *dst, -- struct in6_addr *daddr, struct in6_addr *saddr) --{ -- return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr); --} -- --EXPORT_SYMBOL(ipv6_get_saddr); -- --int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, -- unsigned char banned_flags) --{ -- struct inet6_dev *idev; -- int err = -EADDRNOTAVAIL; -- -- rcu_read_lock(); -- if ((idev = __in6_dev_get(dev)) != NULL) { -- struct inet6_ifaddr *ifp; -- -- read_lock_bh(&idev->lock); -- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { -- if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) { -- ipv6_addr_copy(addr, &ifp->addr); -- err = 0; -- break; -- } -- } -- read_unlock_bh(&idev->lock); -- } -- rcu_read_unlock(); -- return err; --} -- --static int ipv6_count_addresses(struct inet6_dev *idev) --{ -- int cnt = 0; -- struct inet6_ifaddr *ifp; -- -- read_lock_bh(&idev->lock); -- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) -- cnt++; -- read_unlock_bh(&idev->lock); -- return cnt; --} -- --int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict) --{ -- struct inet6_ifaddr * ifp; -- u8 hash = ipv6_addr_hash(addr); -- -- read_lock_bh(&addrconf_hash_lock); -- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { -- if (ipv6_addr_equal(&ifp->addr, addr) && -- !(ifp->flags&IFA_F_TENTATIVE)) { -- if (dev == NULL || ifp->idev->dev == dev || -- !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) -- break; -- } -- } -- read_unlock_bh(&addrconf_hash_lock); -- return ifp != NULL; --} -- --EXPORT_SYMBOL(ipv6_chk_addr); -- --static --int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev) --{ -- struct inet6_ifaddr * ifp; -- u8 hash = ipv6_addr_hash(addr); -- -- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { -- if (ipv6_addr_equal(&ifp->addr, addr)) { -- if (dev == NULL || ifp->idev->dev == dev) -- break; -- } -- } -- return ifp != NULL; --} -- --struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict) --{ -- struct inet6_ifaddr * ifp; -- u8 hash = ipv6_addr_hash(addr); -- -- read_lock_bh(&addrconf_hash_lock); -- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { -- if (ipv6_addr_equal(&ifp->addr, addr)) { -- if (dev == NULL || ifp->idev->dev == dev || -- !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { -- in6_ifa_hold(ifp); -- break; -- } -- } -- } -- read_unlock_bh(&addrconf_hash_lock); -- -- return ifp; --} -- --int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) --{ -- const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; -- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); -- __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; -- __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); -- int sk_ipv6only = ipv6_only_sock(sk); -- int sk2_ipv6only = inet_v6_ipv6only(sk2); -- int addr_type = ipv6_addr_type(sk_rcv_saddr6); -- int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; -- -- if (!sk2_rcv_saddr && !sk_ipv6only) -- return 1; -- -- if (addr_type2 == IPV6_ADDR_ANY && -- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) -- return 1; -- -- if (addr_type == IPV6_ADDR_ANY && -- !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) -- return 1; -- -- if (sk2_rcv_saddr6 && -- ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) -- return 1; -- -- if (addr_type == IPV6_ADDR_MAPPED && -- !sk2_ipv6only && -- (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr)) -- return 1; -- -- return 0; --} -- --/* Gets referenced address, destroys ifaddr */ -- --static void addrconf_dad_stop(struct inet6_ifaddr *ifp) --{ -- if (ifp->flags&IFA_F_PERMANENT) { -- spin_lock_bh(&ifp->lock); -- addrconf_del_timer(ifp); -- ifp->flags |= IFA_F_TENTATIVE; -- spin_unlock_bh(&ifp->lock); -- in6_ifa_put(ifp); --#ifdef CONFIG_IPV6_PRIVACY -- } else if (ifp->flags&IFA_F_TEMPORARY) { -- struct inet6_ifaddr *ifpub; -- spin_lock_bh(&ifp->lock); -- ifpub = ifp->ifpub; -- if (ifpub) { -- in6_ifa_hold(ifpub); -- spin_unlock_bh(&ifp->lock); -- ipv6_create_tempaddr(ifpub, ifp); -- in6_ifa_put(ifpub); -- } else { -- spin_unlock_bh(&ifp->lock); -- } -- ipv6_del_addr(ifp); --#endif -- } else -- ipv6_del_addr(ifp); --} -- --void addrconf_dad_failure(struct inet6_ifaddr *ifp) --{ -- if (net_ratelimit()) -- printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); -- addrconf_dad_stop(ifp); --} -- --/* Join to solicited addr multicast group. */ -- --void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr) --{ -- struct in6_addr maddr; -- -- if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) -- return; -- -- addrconf_addr_solict_mult(addr, &maddr); -- ipv6_dev_mc_inc(dev, &maddr); --} -- --void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr) --{ -- struct in6_addr maddr; -- -- if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) -- return; -- -- addrconf_addr_solict_mult(addr, &maddr); -- __ipv6_dev_mc_dec(idev, &maddr); --} -- --static void addrconf_join_anycast(struct inet6_ifaddr *ifp) --{ -- struct in6_addr addr; -- ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); -- if (ipv6_addr_any(&addr)) -- return; -- ipv6_dev_ac_inc(ifp->idev->dev, &addr); --} -- --static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) --{ -- struct in6_addr addr; -- ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); -- if (ipv6_addr_any(&addr)) -- return; -- __ipv6_dev_ac_dec(ifp->idev, &addr); --} -- --static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) --{ -- if (dev->addr_len != ETH_ALEN) -- return -1; -- memcpy(eui, dev->dev_addr, 3); -- memcpy(eui + 5, dev->dev_addr + 3, 3); -- -- /* -- * The zSeries OSA network cards can be shared among various -- * OS instances, but the OSA cards have only one MAC address. -- * This leads to duplicate address conflicts in conjunction -- * with IPv6 if more than one instance uses the same card. -- * -- * The driver for these cards can deliver a unique 16-bit -- * identifier for each instance sharing the same card. It is -- * placed instead of 0xFFFE in the interface identifier. The -- * "u" bit of the interface identifier is not inverted in this -- * case. Hence the resulting interface identifier has local -- * scope according to RFC2373. -- */ -- if (dev->dev_id) { -- eui[3] = (dev->dev_id >> 8) & 0xFF; -- eui[4] = dev->dev_id & 0xFF; -- } else { -- eui[3] = 0xFF; -- eui[4] = 0xFE; -- eui[0] ^= 2; -- } -- return 0; --} -- --static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev) --{ -- /* XXX: inherit EUI-64 from other interface -- yoshfuji */ -- if (dev->addr_len != ARCNET_ALEN) -- return -1; -- memset(eui, 0, 7); -- eui[7] = *(u8*)dev->dev_addr; -- return 0; --} -- --static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev) --{ -- if (dev->addr_len != INFINIBAND_ALEN) -- return -1; -- memcpy(eui, dev->dev_addr + 12, 8); -- eui[0] |= 2; -- return 0; --} -- --static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) --{ -- switch (dev->type) { -- case ARPHRD_ETHER: -- case ARPHRD_FDDI: -- case ARPHRD_IEEE802_TR: -- return addrconf_ifid_eui48(eui, dev); -- case ARPHRD_ARCNET: -- return addrconf_ifid_arcnet(eui, dev); -- case ARPHRD_INFINIBAND: -- return addrconf_ifid_infiniband(eui, dev); -- } -- return -1; --} -- --static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) --{ -- int err = -1; -- struct inet6_ifaddr *ifp; -- -- read_lock_bh(&idev->lock); -- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { -- if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { -- memcpy(eui, ifp->addr.s6_addr+8, 8); -- err = 0; -- break; -- } -- } -- read_unlock_bh(&idev->lock); -- return err; --} -- --#ifdef CONFIG_IPV6_PRIVACY --/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ --static int __ipv6_regen_rndid(struct inet6_dev *idev) --{ --regen: -- get_random_bytes(idev->rndid, sizeof(idev->rndid)); -- idev->rndid[0] &= ~0x02; -- -- /* -- * : -- * check if generated address is not inappropriate -- * -- * - Reserved subnet anycast (RFC 2526) -- * 11111101 11....11 1xxxxxxx -- * - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1 -- * 00-00-5E-FE-xx-xx-xx-xx -- * - value 0 -- * - XXX: already assigned to an address on the device -- */ -- if (idev->rndid[0] == 0xfd && -- (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff && -- (idev->rndid[7]&0x80)) -- goto regen; -- if ((idev->rndid[0]|idev->rndid[1]) == 0) { -- if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe) -- goto regen; -- if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) -- goto regen; -- } -- -- return 0; --} -- --static void ipv6_regen_rndid(unsigned long data) --{ -- struct inet6_dev *idev = (struct inet6_dev *) data; -- unsigned long expires; -- -- rcu_read_lock_bh(); -- write_lock_bh(&idev->lock); -- -- if (idev->dead) -- goto out; -- -- if (__ipv6_regen_rndid(idev) < 0) -- goto out; -- -- expires = jiffies + -- idev->cnf.temp_prefered_lft * HZ - -- idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor; -- if (time_before(expires, jiffies)) { -- printk(KERN_WARNING -- "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n", -- idev->dev->name); -- goto out; -- } -- -- if (!mod_timer(&idev->regen_timer, expires)) -- in6_dev_hold(idev); -- --out: -- write_unlock_bh(&idev->lock); -- rcu_read_unlock_bh(); -- in6_dev_put(idev); --} -- --static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { -- int ret = 0; -- -- if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) -- ret = __ipv6_regen_rndid(idev); -- return ret; --} --#endif -- --/* -- * Add prefix route. -- */ -- --static void --addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, -- unsigned long expires, u32 flags) --{ -- struct fib6_config cfg = { -- .fc_table = RT6_TABLE_PREFIX, -- .fc_metric = IP6_RT_PRIO_ADDRCONF, -- .fc_ifindex = dev->ifindex, -- .fc_expires = expires, -- .fc_dst_len = plen, -- .fc_flags = RTF_UP | flags, -- }; -- -- ipv6_addr_copy(&cfg.fc_dst, pfx); -- -- /* Prevent useless cloning on PtP SIT. -- This thing is done here expecting that the whole -- class of non-broadcast devices need not cloning. -- */ --#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) -- if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) -- cfg.fc_flags |= RTF_NONEXTHOP; --#endif -- -- ip6_route_add(&cfg); --} -- --/* Create "default" multicast route to the interface */ -- --static void addrconf_add_mroute(struct net_device *dev) --{ -- struct fib6_config cfg = { -- .fc_table = RT6_TABLE_LOCAL, -- .fc_metric = IP6_RT_PRIO_ADDRCONF, -- .fc_ifindex = dev->ifindex, -- .fc_dst_len = 8, -- .fc_flags = RTF_UP, -- }; -- -- ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); -- -- ip6_route_add(&cfg); --} -- --#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) --static void sit_route_add(struct net_device *dev) --{ -- struct fib6_config cfg = { -- .fc_table = RT6_TABLE_MAIN, -- .fc_metric = IP6_RT_PRIO_ADDRCONF, -- .fc_ifindex = dev->ifindex, -- .fc_dst_len = 96, -- .fc_flags = RTF_UP | RTF_NONEXTHOP, -- }; -- -- /* prefix length - 96 bits "::d.d.d.d" */ -- ip6_route_add(&cfg); --} --#endif -- --static void addrconf_add_lroute(struct net_device *dev) --{ -- struct in6_addr addr; -- -- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); -- addrconf_prefix_route(&addr, 64, dev, 0, 0); --} -- --static struct inet6_dev *addrconf_add_dev(struct net_device *dev) --{ -- struct inet6_dev *idev; -- -- ASSERT_RTNL(); -- -- if ((idev = ipv6_find_idev(dev)) == NULL) -- return NULL; -- -- /* Add default multicast route */ -- addrconf_add_mroute(dev); -- -- /* Add link local route */ -- addrconf_add_lroute(dev); -- return idev; --} -- --void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) --{ -- struct prefix_info *pinfo; -- __u32 valid_lft; -- __u32 prefered_lft; -- int addr_type; -- unsigned long rt_expires; -- struct inet6_dev *in6_dev; -- -- pinfo = (struct prefix_info *) opt; -- -- if (len < sizeof(struct prefix_info)) { -- ADBG(("addrconf: prefix option too short\n")); -- return; -- } -- -- /* -- * Validation checks ([ADDRCONF], page 19) -- */ -- -- addr_type = ipv6_addr_type(&pinfo->prefix); -- -- if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)) -- return; -- -- valid_lft = ntohl(pinfo->valid); -- prefered_lft = ntohl(pinfo->prefered); -- -- if (prefered_lft > valid_lft) { -- if (net_ratelimit()) -- printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n"); -- return; -- } -- -- in6_dev = in6_dev_get(dev); -- -- if (in6_dev == NULL) { -- if (net_ratelimit()) -- printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name); -- return; -- } -- -- /* -- * Two things going on here: -- * 1) Add routes for on-link prefixes -- * 2) Configure prefixes with the auto flag set -- */ -- -- /* Avoid arithmetic overflow. Really, we could -- save rt_expires in seconds, likely valid_lft, -- but it would require division in fib gc, that it -- not good. -- */ -- if (valid_lft >= 0x7FFFFFFF/HZ) -- rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); -- else -- rt_expires = valid_lft * HZ; -- -- /* -- * We convert this (in jiffies) to clock_t later. -- * Avoid arithmetic overflow there as well. -- * Overflow can happen only if HZ < USER_HZ. -- */ -- if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ) -- rt_expires = 0x7FFFFFFF / USER_HZ; -- -- if (pinfo->onlink) { -- struct rt6_info *rt; -- rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1); -- -- if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { -- if (rt->rt6i_flags&RTF_EXPIRES) { -- if (valid_lft == 0) { -- ip6_del_rt(rt); -- rt = NULL; -- } else { -- rt->rt6i_expires = jiffies + rt_expires; -- } -- } -- } else if (valid_lft) { -- addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, -- dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT); -- } -- if (rt) -- dst_release(&rt->u.dst); -- } -- -- /* Try to figure out our local address for this prefix */ -- -- if (pinfo->autoconf && in6_dev->cnf.autoconf) { -- struct inet6_ifaddr * ifp; -- struct in6_addr addr; -- int create = 0, update_lft = 0; -- -- if (pinfo->prefix_len == 64) { -- memcpy(&addr, &pinfo->prefix, 8); -- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && -- ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { -- in6_dev_put(in6_dev); -- return; -- } -- goto ok; -- } -- if (net_ratelimit()) -- printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n", -- pinfo->prefix_len); -- in6_dev_put(in6_dev); -- return; -- --ok: -- -- ifp = ipv6_get_ifaddr(&addr, dev, 1); -- -- if (ifp == NULL && valid_lft) { -- int max_addresses = in6_dev->cnf.max_addresses; -- u32 addr_flags = 0; -- --#ifdef CONFIG_IPV6_OPTIMISTIC_DAD -- if (in6_dev->cnf.optimistic_dad && -- !ipv6_devconf.forwarding) -- addr_flags = IFA_F_OPTIMISTIC; --#endif -- -- /* Do not allow to create too much of autoconfigured -- * addresses; this would be too easy way to crash kernel. -- */ -- if (!max_addresses || -- ipv6_count_addresses(in6_dev) < max_addresses) -- ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len, -- addr_type&IPV6_ADDR_SCOPE_MASK, -- addr_flags); -- -- if (!ifp || IS_ERR(ifp)) { -- in6_dev_put(in6_dev); -- return; -- } -- -- update_lft = create = 1; -- ifp->cstamp = jiffies; -- addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT); -- } -- -- if (ifp) { -- int flags; -- unsigned long now; --#ifdef CONFIG_IPV6_PRIVACY -- struct inet6_ifaddr *ift; --#endif -- u32 stored_lft; -- -- /* update lifetime (RFC2462 5.5.3 e) */ -- spin_lock(&ifp->lock); -- now = jiffies; -- if (ifp->valid_lft > (now - ifp->tstamp) / HZ) -- stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; -- else -- stored_lft = 0; -- if (!update_lft && stored_lft) { -- if (valid_lft > MIN_VALID_LIFETIME || -- valid_lft > stored_lft) -- update_lft = 1; -- else if (stored_lft <= MIN_VALID_LIFETIME) { -- /* valid_lft <= stored_lft is always true */ -- /* XXX: IPsec */ -- update_lft = 0; -- } else { -- valid_lft = MIN_VALID_LIFETIME; -- if (valid_lft < prefered_lft) -- prefered_lft = valid_lft; -- update_lft = 1; -- } -- } -- -- if (update_lft) { -- ifp->valid_lft = valid_lft; -- ifp->prefered_lft = prefered_lft; -- ifp->tstamp = now; -- flags = ifp->flags; -- ifp->flags &= ~IFA_F_DEPRECATED; -- spin_unlock(&ifp->lock); -- -- if (!(flags&IFA_F_TENTATIVE)) -- ipv6_ifa_notify(0, ifp); -- } else -- spin_unlock(&ifp->lock); -- --#ifdef CONFIG_IPV6_PRIVACY -- read_lock_bh(&in6_dev->lock); -- /* update all temporary addresses in the list */ -- for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) { -- /* -- * When adjusting the lifetimes of an existing -- * temporary address, only lower the lifetimes. -- * Implementations must not increase the -- * lifetimes of an existing temporary address -- * when processing a Prefix Information Option. -- */ -- spin_lock(&ift->lock); -- flags = ift->flags; -- if (ift->valid_lft > valid_lft && -- ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ) -- ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ; -- if (ift->prefered_lft > prefered_lft && -- ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ) -- ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ; -- spin_unlock(&ift->lock); -- if (!(flags&IFA_F_TENTATIVE)) -- ipv6_ifa_notify(0, ift); -- } -- -- if (create && in6_dev->cnf.use_tempaddr > 0) { -- /* -- * When a new public address is created as described in [ADDRCONF], -- * also create a new temporary address. -- */ -- read_unlock_bh(&in6_dev->lock); -- ipv6_create_tempaddr(ifp, NULL); -- } else { -- read_unlock_bh(&in6_dev->lock); -- } --#endif -- in6_ifa_put(ifp); -- addrconf_verify(0); -- } -- } -- inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); -- in6_dev_put(in6_dev); --} -- --/* -- * Set destination address. -- * Special case for SIT interfaces where we create a new "virtual" -- * device. -- */ --int addrconf_set_dstaddr(void __user *arg) --{ -- struct in6_ifreq ireq; -- struct net_device *dev; -- int err = -EINVAL; -- -- rtnl_lock(); -- -- err = -EFAULT; -- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) -- goto err_exit; -- -- dev = __dev_get_by_index(ireq.ifr6_ifindex); -- -- err = -ENODEV; -- if (dev == NULL) -- goto err_exit; -- --#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) -- if (dev->type == ARPHRD_SIT) { -- struct ifreq ifr; -- mm_segment_t oldfs; -- struct ip_tunnel_parm p; -- -- err = -EADDRNOTAVAIL; -- if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4)) -- goto err_exit; -- -- memset(&p, 0, sizeof(p)); -- p.iph.daddr = ireq.ifr6_addr.s6_addr32[3]; -- p.iph.saddr = 0; -- p.iph.version = 4; -- p.iph.ihl = 5; -- p.iph.protocol = IPPROTO_IPV6; -- p.iph.ttl = 64; -- ifr.ifr_ifru.ifru_data = (void __user *)&p; -- -- oldfs = get_fs(); set_fs(KERNEL_DS); -- err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); -- set_fs(oldfs); -- -- if (err == 0) { -- err = -ENOBUFS; -- if ((dev = __dev_get_by_name(p.name)) == NULL) -- goto err_exit; -- err = dev_open(dev); -- } -- } --#endif -- --err_exit: -- rtnl_unlock(); -- return err; --} -- --/* -- * Manual configuration of address on an interface -- */ --static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, -- __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) --{ -- struct inet6_ifaddr *ifp; -- struct inet6_dev *idev; -- struct net_device *dev; -- int scope; -- u32 flags = RTF_EXPIRES; -- -- ASSERT_RTNL(); -- -- /* check the lifetime */ -- if (!valid_lft || prefered_lft > valid_lft) -- return -EINVAL; -- -- if ((dev = __dev_get_by_index(ifindex)) == NULL) -- return -ENODEV; -- -- if ((idev = addrconf_add_dev(dev)) == NULL) -- return -ENOBUFS; -- -- scope = ipv6_addr_scope(pfx); -- -- if (valid_lft == INFINITY_LIFE_TIME) { -- ifa_flags |= IFA_F_PERMANENT; -- flags = 0; -- } else if (valid_lft >= 0x7FFFFFFF/HZ) -- valid_lft = 0x7FFFFFFF/HZ; -- -- if (prefered_lft == 0) -- ifa_flags |= IFA_F_DEPRECATED; -- else if ((prefered_lft >= 0x7FFFFFFF/HZ) && -- (prefered_lft != INFINITY_LIFE_TIME)) -- prefered_lft = 0x7FFFFFFF/HZ; -- -- ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); -- -- if (!IS_ERR(ifp)) { -- spin_lock_bh(&ifp->lock); -- ifp->valid_lft = valid_lft; -- ifp->prefered_lft = prefered_lft; -- ifp->tstamp = jiffies; -- spin_unlock_bh(&ifp->lock); -- -- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, -- jiffies_to_clock_t(valid_lft * HZ), flags); -- /* -- * Note that section 3.1 of RFC 4429 indicates -- * that the Optimistic flag should not be set for -- * manually configured addresses -- */ -- addrconf_dad_start(ifp, 0); -- in6_ifa_put(ifp); -- addrconf_verify(0); -- return 0; -- } -- -- return PTR_ERR(ifp); --} -- --static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) --{ -- struct inet6_ifaddr *ifp; -- struct inet6_dev *idev; -- struct net_device *dev; -- -- if ((dev = __dev_get_by_index(ifindex)) == NULL) -- return -ENODEV; -- -- if ((idev = __in6_dev_get(dev)) == NULL) -- return -ENXIO; -- -- read_lock_bh(&idev->lock); -- for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) { -- if (ifp->prefix_len == plen && -- ipv6_addr_equal(pfx, &ifp->addr)) { -- in6_ifa_hold(ifp); -- read_unlock_bh(&idev->lock); -- -- ipv6_del_addr(ifp); -- -- /* If the last address is deleted administratively, -- disable IPv6 on this interface. -- */ -- if (idev->addr_list == NULL) -- addrconf_ifdown(idev->dev, 1); -- return 0; -- } -- } -- read_unlock_bh(&idev->lock); -- return -EADDRNOTAVAIL; --} -- -- --int addrconf_add_ifaddr(void __user *arg) --{ -- struct in6_ifreq ireq; -- int err; -- -- if (!capable(CAP_NET_ADMIN)) -- return -EPERM; -- -- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) -- return -EFAULT; -- -- rtnl_lock(); -- err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, -- IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); -- rtnl_unlock(); -- return err; --} -- --int addrconf_del_ifaddr(void __user *arg) --{ -- struct in6_ifreq ireq; -- int err; -- -- if (!capable(CAP_NET_ADMIN)) -- return -EPERM; -- -- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) -- return -EFAULT; -- -- rtnl_lock(); -- err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); -- rtnl_unlock(); -- return err; --} -- --#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) --static void sit_add_v4_addrs(struct inet6_dev *idev) --{ -- struct inet6_ifaddr * ifp; -- struct in6_addr addr; -- struct net_device *dev; -- int scope; -- -- ASSERT_RTNL(); -- -- memset(&addr, 0, sizeof(struct in6_addr)); -- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); -- -- if (idev->dev->flags&IFF_POINTOPOINT) { -- addr.s6_addr32[0] = htonl(0xfe800000); -- scope = IFA_LINK; -- } else { -- scope = IPV6_ADDR_COMPATv4; -- } -- -- if (addr.s6_addr32[3]) { -- ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT); -- if (!IS_ERR(ifp)) { -- spin_lock_bh(&ifp->lock); -- ifp->flags &= ~IFA_F_TENTATIVE; -- spin_unlock_bh(&ifp->lock); -- ipv6_ifa_notify(RTM_NEWADDR, ifp); -- in6_ifa_put(ifp); -- } -- return; -- } -- -- for_each_netdev(dev) { -- struct in_device * in_dev = __in_dev_get_rtnl(dev); -- if (in_dev && (dev->flags & IFF_UP)) { -- struct in_ifaddr * ifa; -- -- int flag = scope; -- -- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { -- int plen; -- -- addr.s6_addr32[3] = ifa->ifa_local; -- -- if (ifa->ifa_scope == RT_SCOPE_LINK) -- continue; -- if (ifa->ifa_scope >= RT_SCOPE_HOST) { -- if (idev->dev->flags&IFF_POINTOPOINT) -- continue; -- flag |= IFA_HOST; -- } -- if (idev->dev->flags&IFF_POINTOPOINT) -- plen = 64; -- else -- plen = 96; -- -- ifp = ipv6_add_addr(idev, &addr, plen, flag, -- IFA_F_PERMANENT); -- if (!IS_ERR(ifp)) { -- spin_lock_bh(&ifp->lock); -- ifp->flags &= ~IFA_F_TENTATIVE; -- spin_unlock_bh(&ifp->lock); -- ipv6_ifa_notify(RTM_NEWADDR, ifp); -- in6_ifa_put(ifp); -- } -- } -- } -- } --} --#endif -- --static void init_loopback(struct net_device *dev) --{ -- struct inet6_dev *idev; -- struct inet6_ifaddr * ifp; -- -- /* ::1 */ -- -- ASSERT_RTNL(); -- -- if ((idev = ipv6_find_idev(dev)) == NULL) { -- printk(KERN_DEBUG "init loopback: add_dev failed\n"); -- return; -- } -- -- ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT); -- if (!IS_ERR(ifp)) { -- spin_lock_bh(&ifp->lock); -- ifp->flags &= ~IFA_F_TENTATIVE; -- spin_unlock_bh(&ifp->lock); -- ipv6_ifa_notify(RTM_NEWADDR, ifp); -- in6_ifa_put(ifp); -- } --} -- --static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr) --{ -- struct inet6_ifaddr * ifp; -- u32 addr_flags = IFA_F_PERMANENT; -- --#ifdef CONFIG_IPV6_OPTIMISTIC_DAD -- if (idev->cnf.optimistic_dad && -- !ipv6_devconf.forwarding) -- addr_flags |= IFA_F_OPTIMISTIC; --#endif -- -- -- ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags); -- if (!IS_ERR(ifp)) { -- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); -- addrconf_dad_start(ifp, 0); -- in6_ifa_put(ifp); -- } --} -- --static void addrconf_dev_config(struct net_device *dev) --{ -- struct in6_addr addr; -- struct inet6_dev * idev; -- -- ASSERT_RTNL(); -- -- if ((dev->type != ARPHRD_ETHER) && -- (dev->type != ARPHRD_FDDI) && -- (dev->type != ARPHRD_IEEE802_TR) && -- (dev->type != ARPHRD_ARCNET) && -- (dev->type != ARPHRD_INFINIBAND)) { -- /* Alas, we support only Ethernet autoconfiguration. */ -- return; -- } -- -- idev = addrconf_add_dev(dev); -- if (idev == NULL) -- return; -- -- memset(&addr, 0, sizeof(struct in6_addr)); -- addr.s6_addr32[0] = htonl(0xFE800000); -- -- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0) -- addrconf_add_linklocal(idev, &addr); --} -- --#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) --static void addrconf_sit_config(struct net_device *dev) --{ -- struct inet6_dev *idev; -- -- ASSERT_RTNL(); -- -- /* -- * Configure the tunnel with one of our IPv4 -- * addresses... we should configure all of -- * our v4 addrs in the tunnel -- */ -- -- if ((idev = ipv6_find_idev(dev)) == NULL) { -- printk(KERN_DEBUG "init sit: add_dev failed\n"); -- return; -- } -- -- sit_add_v4_addrs(idev); -- -- if (dev->flags&IFF_POINTOPOINT) { -- addrconf_add_mroute(dev); -- addrconf_add_lroute(dev); -- } else -- sit_route_add(dev); --} --#endif -- --static inline int --ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) --{ -- struct in6_addr lladdr; -- -- if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) { -- addrconf_add_linklocal(idev, &lladdr); -- return 0; -- } -- return -1; --} -- --static void ip6_tnl_add_linklocal(struct inet6_dev *idev) --{ -- struct net_device *link_dev; -- -- /* first try to inherit the link-local address from the link device */ -- if (idev->dev->iflink && -- (link_dev = __dev_get_by_index(idev->dev->iflink))) { -- if (!ipv6_inherit_linklocal(idev, link_dev)) -- return; -- } -- /* then try to inherit it from any device */ -- for_each_netdev(link_dev) { -- if (!ipv6_inherit_linklocal(idev, link_dev)) -- return; -- } -- printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n"); --} -- --/* -- * Autoconfigure tunnel with a link-local address so routing protocols, -- * DHCPv6, MLD etc. can be run over the virtual link -- */ -- --static void addrconf_ip6_tnl_config(struct net_device *dev) --{ -- struct inet6_dev *idev; -- -- ASSERT_RTNL(); -- -- if ((idev = addrconf_add_dev(dev)) == NULL) { -- printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n"); -- return; -- } -- ip6_tnl_add_linklocal(idev); --} -- --static int addrconf_notify(struct notifier_block *this, unsigned long event, -- void * data) --{ -- struct net_device *dev = (struct net_device *) data; -- struct inet6_dev *idev = __in6_dev_get(dev); -- int run_pending = 0; -- -- switch(event) { -- case NETDEV_REGISTER: -- if (!idev && dev->mtu >= IPV6_MIN_MTU) { -- idev = ipv6_add_dev(dev); -- if (!idev) -- printk(KERN_WARNING "IPv6: add_dev failed for %s\n", -- dev->name); -- } -- break; -- case NETDEV_UP: -- case NETDEV_CHANGE: -- if (event == NETDEV_UP) { -- if (!addrconf_qdisc_ok(dev)) { -- /* device is not ready yet. */ -- printk(KERN_INFO -- "ADDRCONF(NETDEV_UP): %s: " -- "link is not ready\n", -- dev->name); -- break; -- } -- -- if (idev) -- idev->if_flags |= IF_READY; -- } else { -- if (!addrconf_qdisc_ok(dev)) { -- /* device is still not ready. */ -- break; -- } -- -- if (idev) { -- if (idev->if_flags & IF_READY) { -- /* device is already configured. */ -- break; -- } -- idev->if_flags |= IF_READY; -- } -- -- printk(KERN_INFO -- "ADDRCONF(NETDEV_CHANGE): %s: " -- "link becomes ready\n", -- dev->name); -- -- run_pending = 1; -- } -- -- switch(dev->type) { --#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) -- case ARPHRD_SIT: -- addrconf_sit_config(dev); -- break; --#endif -- case ARPHRD_TUNNEL6: -- addrconf_ip6_tnl_config(dev); -- break; -- case ARPHRD_LOOPBACK: -- init_loopback(dev); -- break; -- -- default: -- addrconf_dev_config(dev); -- break; -- } -- if (idev) { -- if (run_pending) -- addrconf_dad_run(idev); -- -- /* If the MTU changed during the interface down, when the -- interface up, the changed MTU must be reflected in the -- idev as well as routers. -- */ -- if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) { -- rt6_mtu_change(dev, dev->mtu); -- idev->cnf.mtu6 = dev->mtu; -- } -- idev->tstamp = jiffies; -- inet6_ifinfo_notify(RTM_NEWLINK, idev); -- /* If the changed mtu during down is lower than IPV6_MIN_MTU -- stop IPv6 on this interface. -- */ -- if (dev->mtu < IPV6_MIN_MTU) -- addrconf_ifdown(dev, event != NETDEV_DOWN); -- } -- break; -- -- case NETDEV_CHANGEMTU: -- if ( idev && dev->mtu >= IPV6_MIN_MTU) { -- rt6_mtu_change(dev, dev->mtu); -- idev->cnf.mtu6 = dev->mtu; -- break; -- } -- -- /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */ -- -- case NETDEV_DOWN: -- case NETDEV_UNREGISTER: -- /* -- * Remove all addresses from this interface. -- */ -- addrconf_ifdown(dev, event != NETDEV_DOWN); -- break; -- -- case NETDEV_CHANGENAME: -- if (idev) { -- snmp6_unregister_dev(idev); --#ifdef CONFIG_SYSCTL -- addrconf_sysctl_unregister(&idev->cnf); -- neigh_sysctl_unregister(idev->nd_parms); -- neigh_sysctl_register(dev, idev->nd_parms, -- NET_IPV6, NET_IPV6_NEIGH, "ipv6", -- &ndisc_ifinfo_sysctl_change, -- NULL); -- addrconf_sysctl_register(idev, &idev->cnf); --#endif -- snmp6_register_dev(idev); -- } -- break; -- } -- -- return NOTIFY_OK; --} -- --/* -- * addrconf module should be notified of a device going up -- */ --static struct notifier_block ipv6_dev_notf = { -- .notifier_call = addrconf_notify, -- .priority = 0 --}; -- --static int addrconf_ifdown(struct net_device *dev, int how) --{ -- struct inet6_dev *idev; -- struct inet6_ifaddr *ifa, **bifa; -- int i; -- -- ASSERT_RTNL(); -- ++ for_each_netdev(&init_net, dev) { + struct in_device * in_dev = __in_dev_get_rtnl(dev); + if (in_dev && (dev->flags & IFF_UP)) { + struct in_ifaddr * ifa; +@@ -2245,12 +2246,12 @@ + + /* first try to inherit the link-local address from the link device */ + if (idev->dev->iflink && +- (link_dev = __dev_get_by_index(idev->dev->iflink))) { ++ (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) { + if (!ipv6_inherit_linklocal(idev, link_dev)) + return; + } + /* then try to inherit it from any device */ +- for_each_netdev(link_dev) { ++ for_each_netdev(&init_net, link_dev) { + if (!ipv6_inherit_linklocal(idev, link_dev)) + return; + } +@@ -2282,6 +2283,9 @@ + struct inet6_dev *idev = __in6_dev_get(dev); + int run_pending = 0; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + switch(event) { + case NETDEV_REGISTER: + if (!idev && dev->mtu >= IPV6_MIN_MTU) { +@@ -2419,7 +2423,7 @@ + + ASSERT_RTNL(); + - if (dev == &loopback_dev && how == 1) -- how = 0; -- -- rt6_ifdown(dev); -- neigh_ifdown(&nd_tbl, dev); -- -- idev = __in6_dev_get(dev); -- if (idev == NULL) -- return -ENODEV; -- -- /* Step 1: remove reference to ipv6 device from parent device. -- Do not dev_put! -- */ -- if (how == 1) { -- idev->dead = 1; -- -- /* protected by rtnl_lock */ -- rcu_assign_pointer(dev->ip6_ptr, NULL); -- -- /* Step 1.5: remove snmp6 entry */ -- snmp6_unregister_dev(idev); -- -- } -- -- /* Step 2: clear hash table */ -- for (i=0; iidev == idev) { -- *bifa = ifa->lst_next; -- ifa->lst_next = NULL; -- addrconf_del_timer(ifa); -- in6_ifa_put(ifa); -- continue; -- } -- bifa = &ifa->lst_next; -- } -- write_unlock_bh(&addrconf_hash_lock); -- } -- -- write_lock_bh(&idev->lock); -- -- /* Step 3: clear flags for stateless addrconf */ -- if (how != 1) -- idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); -- -- /* Step 4: clear address list */ --#ifdef CONFIG_IPV6_PRIVACY -- if (how == 1 && del_timer(&idev->regen_timer)) -- in6_dev_put(idev); -- -- /* clear tempaddr list */ -- while ((ifa = idev->tempaddr_list) != NULL) { -- idev->tempaddr_list = ifa->tmp_next; -- ifa->tmp_next = NULL; -- ifa->dead = 1; -- write_unlock_bh(&idev->lock); -- spin_lock_bh(&ifa->lock); -- -- if (ifa->ifpub) { -- in6_ifa_put(ifa->ifpub); -- ifa->ifpub = NULL; -- } -- spin_unlock_bh(&ifa->lock); -- in6_ifa_put(ifa); -- write_lock_bh(&idev->lock); -- } --#endif -- while ((ifa = idev->addr_list) != NULL) { -- idev->addr_list = ifa->if_next; -- ifa->if_next = NULL; -- ifa->dead = 1; -- addrconf_del_timer(ifa); -- write_unlock_bh(&idev->lock); -- -- __ipv6_ifa_notify(RTM_DELADDR, ifa); -- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); -- in6_ifa_put(ifa); -- -- write_lock_bh(&idev->lock); -- } -- write_unlock_bh(&idev->lock); -- -- /* Step 5: Discard multicast list */ -- -- if (how == 1) -- ipv6_mc_destroy_dev(idev); -- else -- ipv6_mc_down(idev); -- -- /* Step 5: netlink notification of this interface */ -- idev->tstamp = jiffies; -- inet6_ifinfo_notify(RTM_DELLINK, idev); -- -- /* Shot the device (if unregistered) */ -- -- if (how == 1) { --#ifdef CONFIG_SYSCTL -- addrconf_sysctl_unregister(&idev->cnf); -- neigh_sysctl_unregister(idev->nd_parms); --#endif -- neigh_parms_release(&nd_tbl, idev->nd_parms); -- neigh_ifdown(&nd_tbl, dev); -- in6_dev_put(idev); -- } -- return 0; --} -- --static void addrconf_rs_timer(unsigned long data) --{ -- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; -- -- if (ifp->idev->cnf.forwarding) -- goto out; -- -- if (ifp->idev->if_flags & IF_RA_RCVD) { -- /* -- * Announcement received after solicitation -- * was sent -- */ -- goto out; -- } -- -- spin_lock(&ifp->lock); -- if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) { -- struct in6_addr all_routers; -- -- /* The wait after the last probe can be shorter */ -- addrconf_mod_timer(ifp, AC_RS, -- (ifp->probes == ifp->idev->cnf.rtr_solicits) ? -- ifp->idev->cnf.rtr_solicit_delay : -- ifp->idev->cnf.rtr_solicit_interval); -- spin_unlock(&ifp->lock); -- -- ipv6_addr_all_routers(&all_routers); -- -- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); -- } else { -- spin_unlock(&ifp->lock); -- /* -- * Note: we do not support deprecated "all on-link" -- * assumption any longer. -- */ -- printk(KERN_DEBUG "%s: no IPv6 routers present\n", -- ifp->idev->dev->name); -- } -- --out: -- in6_ifa_put(ifp); --} -- --/* -- * Duplicate Address Detection -- */ --static void addrconf_dad_kick(struct inet6_ifaddr *ifp) --{ -- unsigned long rand_num; -- struct inet6_dev *idev = ifp->idev; -- -- if (ifp->flags & IFA_F_OPTIMISTIC) -- rand_num = 0; -- else -- rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); -- -- ifp->probes = idev->cnf.dad_transmits; -- addrconf_mod_timer(ifp, AC_DAD, rand_num); --} -- --static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) --{ -- struct inet6_dev *idev = ifp->idev; -- struct net_device *dev = idev->dev; -- -- addrconf_join_solict(dev, &ifp->addr); -- -- net_srandom(ifp->addr.s6_addr32[3]); -- -- read_lock_bh(&idev->lock); -- if (ifp->dead) -- goto out; -- spin_lock_bh(&ifp->lock); -- -- if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || -- !(ifp->flags&IFA_F_TENTATIVE) || -- ifp->flags & IFA_F_NODAD) { -- ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC); -- spin_unlock_bh(&ifp->lock); -- read_unlock_bh(&idev->lock); -- -- addrconf_dad_completed(ifp); -- return; -- } -- -- if (!(idev->if_flags & IF_READY)) { -- spin_unlock_bh(&ifp->lock); -- read_unlock_bh(&idev->lock); -- /* -- * If the defice is not ready: -- * - keep it tentative if it is a permanent address. -- * - otherwise, kill it. -- */ -- in6_ifa_hold(ifp); -- addrconf_dad_stop(ifp); -- return; -- } -- -- /* -- * Optimistic nodes can start receiving -- * Frames right away -- */ -- if(ifp->flags & IFA_F_OPTIMISTIC) -- ip6_ins_rt(ifp->rt); -- -- addrconf_dad_kick(ifp); -- spin_unlock_bh(&ifp->lock); --out: -- read_unlock_bh(&idev->lock); --} -- --static void addrconf_dad_timer(unsigned long data) --{ -- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; -- struct inet6_dev *idev = ifp->idev; -- struct in6_addr unspec; -- struct in6_addr mcaddr; -- -- read_lock_bh(&idev->lock); -- if (idev->dead) { -- read_unlock_bh(&idev->lock); -- goto out; -- } -- spin_lock_bh(&ifp->lock); -- if (ifp->probes == 0) { -- /* -- * DAD was successful -- */ -- -- ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC); -- spin_unlock_bh(&ifp->lock); -- read_unlock_bh(&idev->lock); -- -- addrconf_dad_completed(ifp); -- -- goto out; -- } -- -- ifp->probes--; -- addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); -- spin_unlock_bh(&ifp->lock); -- read_unlock_bh(&idev->lock); -- -- /* send a neighbour solicitation for our addr */ -- memset(&unspec, 0, sizeof(unspec)); -- addrconf_addr_solict_mult(&ifp->addr, &mcaddr); -- ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec); --out: -- in6_ifa_put(ifp); --} -- --static void addrconf_dad_completed(struct inet6_ifaddr *ifp) --{ -- struct net_device * dev = ifp->idev->dev; -- -- /* -- * Configure the address for reception. Now it is valid. -- */ -- -- ipv6_ifa_notify(RTM_NEWADDR, ifp); -- -- /* If added prefix is link local and forwarding is off, -- start sending router solicitations. -- */ -- -- if (ifp->idev->cnf.forwarding == 0 && -- ifp->idev->cnf.rtr_solicits > 0 && -- (dev->flags&IFF_LOOPBACK) == 0 && -- (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { -- struct in6_addr all_routers; -- -- ipv6_addr_all_routers(&all_routers); -- -- /* -- * If a host as already performed a random delay -- * [...] as part of DAD [...] there is no need -- * to delay again before sending the first RS -- */ -- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); -- -- spin_lock_bh(&ifp->lock); -- ifp->probes = 1; -- ifp->idev->if_flags |= IF_RS_SENT; -- addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); -- spin_unlock_bh(&ifp->lock); -- } --} -- --static void addrconf_dad_run(struct inet6_dev *idev) { -- struct inet6_ifaddr *ifp; -- -- read_lock_bh(&idev->lock); -- for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) { -- spin_lock_bh(&ifp->lock); -- if (!(ifp->flags & IFA_F_TENTATIVE)) { -- spin_unlock_bh(&ifp->lock); -- continue; -- } -- spin_unlock_bh(&ifp->lock); -- addrconf_dad_kick(ifp); -- } -- read_unlock_bh(&idev->lock); --} -- --#ifdef CONFIG_PROC_FS --struct if6_iter_state { -- int bucket; --}; -- --static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) --{ -- struct inet6_ifaddr *ifa = NULL; -- struct if6_iter_state *state = seq->private; -- -- for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { -- ifa = inet6_addr_lst[state->bucket]; -- if (ifa) -- break; -- } -- return ifa; --} -- --static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa) --{ -- struct if6_iter_state *state = seq->private; -- -- ifa = ifa->lst_next; --try_again: -- if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) { -- ifa = inet6_addr_lst[state->bucket]; -- goto try_again; -- } -- return ifa; --} -- --static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) --{ -- struct inet6_ifaddr *ifa = if6_get_first(seq); -- -- if (ifa) -- while(pos && (ifa = if6_get_next(seq, ifa)) != NULL) -- --pos; -- return pos ? NULL : ifa; --} -- --static void *if6_seq_start(struct seq_file *seq, loff_t *pos) --{ -- read_lock_bh(&addrconf_hash_lock); -- return if6_get_idx(seq, *pos); --} -- --static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) --{ -- struct inet6_ifaddr *ifa; -- -- ifa = if6_get_next(seq, v); -- ++*pos; -- return ifa; --} -- --static void if6_seq_stop(struct seq_file *seq, void *v) --{ -- read_unlock_bh(&addrconf_hash_lock); --} -- --static int if6_seq_show(struct seq_file *seq, void *v) --{ -- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; -- seq_printf(seq, -- NIP6_SEQFMT " %02x %02x %02x %02x %8s\n", -- NIP6(ifp->addr), -- ifp->idev->dev->ifindex, -- ifp->prefix_len, -- ifp->scope, -- ifp->flags, -- ifp->idev->dev->name); -- return 0; --} -- --static struct seq_operations if6_seq_ops = { -- .start = if6_seq_start, -- .next = if6_seq_next, -- .show = if6_seq_show, -- .stop = if6_seq_stop, --}; -- --static int if6_seq_open(struct inode *inode, struct file *file) --{ -- struct seq_file *seq; -- int rc = -ENOMEM; -- struct if6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); -- -- if (!s) -- goto out; -- -- rc = seq_open(file, &if6_seq_ops); -- if (rc) -- goto out_kfree; -- -- seq = file->private_data; -- seq->private = s; --out: -- return rc; --out_kfree: -- kfree(s); -- goto out; --} -- --static const struct file_operations if6_fops = { -- .owner = THIS_MODULE, -- .open = if6_seq_open, -- .read = seq_read, -- .llseek = seq_lseek, -- .release = seq_release_private, --}; -- --int __init if6_proc_init(void) --{ ++ if (dev == &init_net.loopback_dev && how == 1) + how = 0; + + rt6_ifdown(dev); +@@ -2850,18 +2854,18 @@ + + int __init if6_proc_init(void) + { - if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops)) -- return -ENOMEM; -- return 0; --} -- --void if6_proc_exit(void) --{ ++ if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops)) + return -ENOMEM; + return 0; + } + + void if6_proc_exit(void) + { - proc_net_remove("if_inet6"); --} --#endif /* CONFIG_PROC_FS */ -- ++ proc_net_remove(&init_net, "if_inet6"); + } + #endif /* CONFIG_PROC_FS */ + -#ifdef CONFIG_IPV6_MIP6 --/* Check if address is a home address configured on any interface. */ --int ipv6_chk_home_addr(struct in6_addr *addr) --{ -- int ret = 0; -- struct inet6_ifaddr * ifp; -- u8 hash = ipv6_addr_hash(addr); -- read_lock_bh(&addrconf_hash_lock); -- for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { -- if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && -- (ifp->flags & IFA_F_HOMEADDRESS)) { -- ret = 1; -- break; -- } -- } -- read_unlock_bh(&addrconf_hash_lock); -- return ret; --} --#endif -- --/* -- * Periodic address status verification -- */ -- --static void addrconf_verify(unsigned long foo) --{ -- struct inet6_ifaddr *ifp; -- unsigned long now, next; -- int i; -- -- spin_lock_bh(&addrconf_verify_lock); -- now = jiffies; -- next = now + ADDR_CHECK_FREQUENCY; -- -- del_timer(&addr_chk_timer); -- -- for (i=0; i < IN6_ADDR_HSIZE; i++) { -- --restart: -- read_lock(&addrconf_hash_lock); -- for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { -- unsigned long age; --#ifdef CONFIG_IPV6_PRIVACY -- unsigned long regen_advance; --#endif -- -- if (ifp->flags & IFA_F_PERMANENT) -- continue; -- -- spin_lock(&ifp->lock); -- age = (now - ifp->tstamp) / HZ; -- --#ifdef CONFIG_IPV6_PRIVACY -- regen_advance = ifp->idev->cnf.regen_max_retry * -- ifp->idev->cnf.dad_transmits * -- ifp->idev->nd_parms->retrans_time / HZ; --#endif -- -- if (ifp->valid_lft != INFINITY_LIFE_TIME && -- age >= ifp->valid_lft) { -- spin_unlock(&ifp->lock); -- in6_ifa_hold(ifp); -- read_unlock(&addrconf_hash_lock); -- ipv6_del_addr(ifp); -- goto restart; -- } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { -- spin_unlock(&ifp->lock); -- continue; -- } else if (age >= ifp->prefered_lft) { -- /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */ -- int deprecate = 0; -- -- if (!(ifp->flags&IFA_F_DEPRECATED)) { -- deprecate = 1; -- ifp->flags |= IFA_F_DEPRECATED; -- } -- -- if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)) -- next = ifp->tstamp + ifp->valid_lft * HZ; -- -- spin_unlock(&ifp->lock); -- -- if (deprecate) { -- in6_ifa_hold(ifp); -- read_unlock(&addrconf_hash_lock); -- -- ipv6_ifa_notify(0, ifp); -- in6_ifa_put(ifp); -- goto restart; -- } --#ifdef CONFIG_IPV6_PRIVACY -- } else if ((ifp->flags&IFA_F_TEMPORARY) && -- !(ifp->flags&IFA_F_TENTATIVE)) { -- if (age >= ifp->prefered_lft - regen_advance) { -- struct inet6_ifaddr *ifpub = ifp->ifpub; -- if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) -- next = ifp->tstamp + ifp->prefered_lft * HZ; -- if (!ifp->regen_count && ifpub) { -- ifp->regen_count++; -- in6_ifa_hold(ifp); -- in6_ifa_hold(ifpub); -- spin_unlock(&ifp->lock); -- read_unlock(&addrconf_hash_lock); -- spin_lock(&ifpub->lock); -- ifpub->regen_count = 0; -- spin_unlock(&ifpub->lock); -- ipv6_create_tempaddr(ifpub, ifp); -- in6_ifa_put(ifpub); -- in6_ifa_put(ifp); -- goto restart; -- } -- } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next)) -- next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ; -- spin_unlock(&ifp->lock); --#endif -- } else { -- /* ifp->prefered_lft <= ifp->valid_lft */ -- if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) -- next = ifp->tstamp + ifp->prefered_lft * HZ; -- spin_unlock(&ifp->lock); -- } -- } -- read_unlock(&addrconf_hash_lock); -- } -- -- addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next; -- add_timer(&addr_chk_timer); -- spin_unlock_bh(&addrconf_verify_lock); --} -- --static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) --{ -- struct in6_addr *pfx = NULL; -- -- if (addr) -- pfx = nla_data(addr); -- -- if (local) { -- if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) -- pfx = NULL; -- else -- pfx = nla_data(local); -- } -- -- return pfx; --} -- --static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { -- [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, -- [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, -- [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, --}; -- --static int --inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) --{ -- struct ifaddrmsg *ifm; -- struct nlattr *tb[IFA_MAX+1]; -- struct in6_addr *pfx; -- int err; -- -- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); -- if (err < 0) -- return err; -- -- ifm = nlmsg_data(nlh); -- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); -- if (pfx == NULL) -- return -EINVAL; -- -- return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); --} -- --static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, -- u32 prefered_lft, u32 valid_lft) --{ -- u32 flags = RTF_EXPIRES; -- -- if (!valid_lft || (prefered_lft > valid_lft)) -- return -EINVAL; -- -- if (valid_lft == INFINITY_LIFE_TIME) { -- ifa_flags |= IFA_F_PERMANENT; -- flags = 0; -- } else if (valid_lft >= 0x7FFFFFFF/HZ) -- valid_lft = 0x7FFFFFFF/HZ; -- -- if (prefered_lft == 0) -- ifa_flags |= IFA_F_DEPRECATED; -- else if ((prefered_lft >= 0x7FFFFFFF/HZ) && -- (prefered_lft != INFINITY_LIFE_TIME)) -- prefered_lft = 0x7FFFFFFF/HZ; -- -- spin_lock_bh(&ifp->lock); -- ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; -- ifp->tstamp = jiffies; -- ifp->valid_lft = valid_lft; -- ifp->prefered_lft = prefered_lft; -- -- spin_unlock_bh(&ifp->lock); -- if (!(ifp->flags&IFA_F_TENTATIVE)) -- ipv6_ifa_notify(0, ifp); -- -- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, -- jiffies_to_clock_t(valid_lft * HZ), flags); -- addrconf_verify(0); -- -- return 0; --} -- --static int --inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) --{ -- struct ifaddrmsg *ifm; -- struct nlattr *tb[IFA_MAX+1]; -- struct in6_addr *pfx; -- struct inet6_ifaddr *ifa; -- struct net_device *dev; -- u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; -- u8 ifa_flags; -- int err; -- -- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); -- if (err < 0) -- return err; -- -- ifm = nlmsg_data(nlh); -- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); -- if (pfx == NULL) -- return -EINVAL; -- -- if (tb[IFA_CACHEINFO]) { -- struct ifa_cacheinfo *ci; -- -- ci = nla_data(tb[IFA_CACHEINFO]); -- valid_lft = ci->ifa_valid; -- preferred_lft = ci->ifa_prefered; -- } else { -- preferred_lft = INFINITY_LIFE_TIME; -- valid_lft = INFINITY_LIFE_TIME; -- } -- ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + /* Check if address is a home address configured on any interface. */ + int ipv6_chk_home_addr(struct in6_addr *addr) + { +@@ -3017,11 +3021,15 @@ + static int + inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct ifaddrmsg *ifm; + struct nlattr *tb[IFA_MAX+1]; + struct in6_addr *pfx; + int err; + ++ if (net != &init_net) ++ return -EINVAL; ++ + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + if (err < 0) + return err; +@@ -3074,6 +3082,7 @@ + static int + inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct ifaddrmsg *ifm; + struct nlattr *tb[IFA_MAX+1]; + struct in6_addr *pfx; +@@ -3083,6 +3092,9 @@ + u8 ifa_flags; + int err; + ++ if (net != &init_net) ++ return -EINVAL; ++ + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + if (err < 0) + return err; +@@ -3103,7 +3115,7 @@ + valid_lft = INFINITY_LIFE_TIME; + } + - dev = __dev_get_by_index(ifm->ifa_index); -- if (dev == NULL) -- return -ENODEV; -- -- /* We ignore other flags so far. */ -- ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); -- -- ifa = ipv6_get_ifaddr(pfx, dev, 1); -- if (ifa == NULL) { -- /* -- * It would be best to check for !NLM_F_CREATE here but -- * userspace alreay relies on not having to provide this. -- */ -- return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, -- ifa_flags, preferred_lft, valid_lft); -- } -- -- if (nlh->nlmsg_flags & NLM_F_EXCL || -- !(nlh->nlmsg_flags & NLM_F_REPLACE)) -- err = -EEXIST; -- else -- err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft); -- -- in6_ifa_put(ifa); -- -- return err; --} -- --static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, -- u8 scope, int ifindex) --{ -- struct ifaddrmsg *ifm; -- -- ifm = nlmsg_data(nlh); -- ifm->ifa_family = AF_INET6; -- ifm->ifa_prefixlen = prefixlen; -- ifm->ifa_flags = flags; -- ifm->ifa_scope = scope; -- ifm->ifa_index = ifindex; --} -- --static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, -- unsigned long tstamp, u32 preferred, u32 valid) --{ -- struct ifa_cacheinfo ci; -- -- ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100 -- + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); -- ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100 -- + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); -- ci.ifa_prefered = preferred; -- ci.ifa_valid = valid; -- -- return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); --} -- --static inline int rt_scope(int ifa_scope) --{ -- if (ifa_scope & IFA_HOST) -- return RT_SCOPE_HOST; -- else if (ifa_scope & IFA_LINK) -- return RT_SCOPE_LINK; -- else if (ifa_scope & IFA_SITE) -- return RT_SCOPE_SITE; -- else -- return RT_SCOPE_UNIVERSE; --} -- --static inline int inet6_ifaddr_msgsize(void) --{ -- return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) -- + nla_total_size(16) /* IFA_ADDRESS */ -- + nla_total_size(sizeof(struct ifa_cacheinfo)); --} -- --static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, -- u32 pid, u32 seq, int event, unsigned int flags) --{ -- struct nlmsghdr *nlh; -- u32 preferred, valid; -- -- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); -- if (nlh == NULL) -- return -EMSGSIZE; -- -- put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), -- ifa->idev->dev->ifindex); -- -- if (!(ifa->flags&IFA_F_PERMANENT)) { -- preferred = ifa->prefered_lft; -- valid = ifa->valid_lft; -- if (preferred != INFINITY_LIFE_TIME) { -- long tval = (jiffies - ifa->tstamp)/HZ; -- preferred -= tval; -- if (valid != INFINITY_LIFE_TIME) -- valid -= tval; -- } -- } else { -- preferred = INFINITY_LIFE_TIME; -- valid = INFINITY_LIFE_TIME; -- } -- -- if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || -- put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) { -- nlmsg_cancel(skb, nlh); -- return -EMSGSIZE; -- } -- -- return nlmsg_end(skb, nlh); --} -- --static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, -- u32 pid, u32 seq, int event, u16 flags) --{ -- struct nlmsghdr *nlh; -- u8 scope = RT_SCOPE_UNIVERSE; -- int ifindex = ifmca->idev->dev->ifindex; -- -- if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) -- scope = RT_SCOPE_SITE; -- -- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); -- if (nlh == NULL) -- return -EMSGSIZE; -- -- put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); -- if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || -- put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, -- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { -- nlmsg_cancel(skb, nlh); -- return -EMSGSIZE; -- } -- -- return nlmsg_end(skb, nlh); --} -- --static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, -- u32 pid, u32 seq, int event, unsigned int flags) --{ -- struct nlmsghdr *nlh; -- u8 scope = RT_SCOPE_UNIVERSE; -- int ifindex = ifaca->aca_idev->dev->ifindex; -- -- if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) -- scope = RT_SCOPE_SITE; -- -- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); -- if (nlh == NULL) -- return -EMSGSIZE; -- -- put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); -- if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || -- put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, -- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { -- nlmsg_cancel(skb, nlh); -- return -EMSGSIZE; -- } -- -- return nlmsg_end(skb, nlh); --} -- --enum addr_type_t --{ -- UNICAST_ADDR, -- MULTICAST_ADDR, -- ANYCAST_ADDR, --}; -- --static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, -- enum addr_type_t type) --{ -- int idx, ip_idx; -- int s_idx, s_ip_idx; -- int err = 1; -- struct net_device *dev; -- struct inet6_dev *idev = NULL; -- struct inet6_ifaddr *ifa; -- struct ifmcaddr6 *ifmca; -- struct ifacaddr6 *ifaca; -- -- s_idx = cb->args[0]; -- s_ip_idx = ip_idx = cb->args[1]; -- -- idx = 0; ++ dev = __dev_get_by_index(&init_net, ifm->ifa_index); + if (dev == NULL) + return -ENODEV; + +@@ -3292,7 +3304,7 @@ + s_ip_idx = ip_idx = cb->args[1]; + + idx = 0; - for_each_netdev(dev) { -- if (idx < s_idx) -- goto cont; -- if (idx > s_idx) -- s_ip_idx = 0; -- ip_idx = 0; -- if ((idev = in6_dev_get(dev)) == NULL) -- goto cont; -- read_lock_bh(&idev->lock); -- switch (type) { -- case UNICAST_ADDR: -- /* unicast address incl. temp addr */ -- for (ifa = idev->addr_list; ifa; -- ifa = ifa->if_next, ip_idx++) { -- if (ip_idx < s_ip_idx) -- continue; -- if ((err = inet6_fill_ifaddr(skb, ifa, -- NETLINK_CB(cb->skb).pid, -- cb->nlh->nlmsg_seq, RTM_NEWADDR, -- NLM_F_MULTI)) <= 0) -- goto done; -- } -- break; -- case MULTICAST_ADDR: -- /* multicast address */ -- for (ifmca = idev->mc_list; ifmca; -- ifmca = ifmca->next, ip_idx++) { -- if (ip_idx < s_ip_idx) -- continue; -- if ((err = inet6_fill_ifmcaddr(skb, ifmca, -- NETLINK_CB(cb->skb).pid, -- cb->nlh->nlmsg_seq, RTM_GETMULTICAST, -- NLM_F_MULTI)) <= 0) -- goto done; -- } -- break; -- case ANYCAST_ADDR: -- /* anycast address */ -- for (ifaca = idev->ac_list; ifaca; -- ifaca = ifaca->aca_next, ip_idx++) { -- if (ip_idx < s_ip_idx) -- continue; -- if ((err = inet6_fill_ifacaddr(skb, ifaca, -- NETLINK_CB(cb->skb).pid, -- cb->nlh->nlmsg_seq, RTM_GETANYCAST, -- NLM_F_MULTI)) <= 0) -- goto done; -- } -- break; -- default: -- break; -- } -- read_unlock_bh(&idev->lock); -- in6_dev_put(idev); --cont: -- idx++; -- } --done: -- if (err <= 0) { -- read_unlock_bh(&idev->lock); -- in6_dev_put(idev); -- } -- cb->args[0] = idx; -- cb->args[1] = ip_idx; -- return skb->len; --} -- --static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) --{ -- enum addr_type_t type = UNICAST_ADDR; -- return inet6_dump_addr(skb, cb, type); --} -- --static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) --{ -- enum addr_type_t type = MULTICAST_ADDR; -- return inet6_dump_addr(skb, cb, type); --} -- -- --static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) --{ -- enum addr_type_t type = ANYCAST_ADDR; -- return inet6_dump_addr(skb, cb, type); --} -- --static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, -- void *arg) --{ -- struct ifaddrmsg *ifm; -- struct nlattr *tb[IFA_MAX+1]; -- struct in6_addr *addr = NULL; -- struct net_device *dev = NULL; -- struct inet6_ifaddr *ifa; -- struct sk_buff *skb; -- int err; -- -- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); -- if (err < 0) -- goto errout; -- -- addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); -- if (addr == NULL) { -- err = -EINVAL; -- goto errout; -- } -- -- ifm = nlmsg_data(nlh); -- if (ifm->ifa_index) ++ for_each_netdev(&init_net, dev) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) +@@ -3367,26 +3379,42 @@ + + static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + enum addr_type_t type = UNICAST_ADDR; ++ ++ if (net != &init_net) ++ return 0; ++ + return inet6_dump_addr(skb, cb, type); + } + + static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + enum addr_type_t type = MULTICAST_ADDR; ++ ++ if (net != &init_net) ++ return 0; ++ + return inet6_dump_addr(skb, cb, type); + } + + + static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + enum addr_type_t type = ANYCAST_ADDR; ++ ++ if (net != &init_net) ++ return 0; ++ + return inet6_dump_addr(skb, cb, type); + } + + static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, + void *arg) + { ++ struct net *net = in_skb->sk->sk_net; + struct ifaddrmsg *ifm; + struct nlattr *tb[IFA_MAX+1]; + struct in6_addr *addr = NULL; +@@ -3395,6 +3423,9 @@ + struct sk_buff *skb; + int err; + ++ if (net != &init_net) ++ return -EINVAL; ++ + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + if (err < 0) + goto errout; +@@ -3407,7 +3438,7 @@ + + ifm = nlmsg_data(nlh); + if (ifm->ifa_index) - dev = __dev_get_by_index(ifm->ifa_index); -- -- if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { -- err = -EADDRNOTAVAIL; -- goto errout; -- } -- -- if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) { -- err = -ENOBUFS; -- goto errout_ifa; -- } -- -- err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, -- nlh->nlmsg_seq, RTM_NEWADDR, 0); -- if (err < 0) { -- /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ -- WARN_ON(err == -EMSGSIZE); -- kfree_skb(skb); -- goto errout_ifa; -- } ++ dev = __dev_get_by_index(&init_net, ifm->ifa_index); + + if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { + err = -EADDRNOTAVAIL; +@@ -3427,7 +3458,7 @@ + kfree_skb(skb); + goto errout_ifa; + } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); --errout_ifa: -- in6_ifa_put(ifa); --errout: -- return err; --} -- --static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) --{ -- struct sk_buff *skb; -- int err = -ENOBUFS; -- -- skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); -- if (skb == NULL) -- goto errout; -- -- err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); -- if (err < 0) { -- /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ -- WARN_ON(err == -EMSGSIZE); -- kfree_skb(skb); -- goto errout; -- } ++ err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + errout_ifa: + in6_ifa_put(ifa); + errout: +@@ -3450,10 +3481,10 @@ + kfree_skb(skb); + goto errout; + } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); --errout: -- if (err < 0) ++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + errout: + if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); --} -- --static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, -- __s32 *array, int bytes) --{ -- BUG_ON(bytes < (DEVCONF_MAX * 4)); -- -- memset(array, 0, bytes); -- array[DEVCONF_FORWARDING] = cnf->forwarding; -- array[DEVCONF_HOPLIMIT] = cnf->hop_limit; -- array[DEVCONF_MTU6] = cnf->mtu6; -- array[DEVCONF_ACCEPT_RA] = cnf->accept_ra; -- array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects; -- array[DEVCONF_AUTOCONF] = cnf->autoconf; -- array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits; -- array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; -- array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval; -- array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay; -- array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; --#ifdef CONFIG_IPV6_PRIVACY -- array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; -- array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft; -- array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft; -- array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry; -- array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; --#endif -- array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; -- array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; -- array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; --#ifdef CONFIG_IPV6_ROUTER_PREF -- array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; -- array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval; --#ifdef CONFIG_IPV6_ROUTE_INFO -- array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; --#endif --#endif -- array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; -- array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; --#ifdef CONFIG_IPV6_OPTIMISTIC_DAD -- array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; --#endif --} -- --static inline size_t inet6_if_nlmsg_size(void) --{ -- return NLMSG_ALIGN(sizeof(struct ifinfomsg)) -- + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ -- + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ -- + nla_total_size(4) /* IFLA_MTU */ -- + nla_total_size(4) /* IFLA_LINK */ -- + nla_total_size( /* IFLA_PROTINFO */ -- nla_total_size(4) /* IFLA_INET6_FLAGS */ -- + nla_total_size(sizeof(struct ifla_cacheinfo)) -- + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ -- + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ -- + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ -- ); --} -- --static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items, -- int bytes) --{ -- int i; -- int pad = bytes - sizeof(u64) * items; -- BUG_ON(pad < 0); -- -- /* Use put_unaligned() because stats may not be aligned for u64. */ -- put_unaligned(items, &stats[0]); -- for (i = 1; i < items; i++) -- put_unaligned(snmp_fold_field(mib, i), &stats[i]); -- -- memset(&stats[items], 0, pad); --} -- --static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, -- int bytes) --{ -- switch(attrtype) { -- case IFLA_INET6_STATS: -- __snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes); -- break; -- case IFLA_INET6_ICMP6STATS: -- __snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes); -- break; -- } --} -- --static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, -- u32 pid, u32 seq, int event, unsigned int flags) --{ -- struct net_device *dev = idev->dev; -- struct nlattr *nla; -- struct ifinfomsg *hdr; -- struct nlmsghdr *nlh; -- void *protoinfo; -- struct ifla_cacheinfo ci; -- -- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); -- if (nlh == NULL) -- return -EMSGSIZE; -- -- hdr = nlmsg_data(nlh); -- hdr->ifi_family = AF_INET6; -- hdr->__ifi_pad = 0; -- hdr->ifi_type = dev->type; -- hdr->ifi_index = dev->ifindex; -- hdr->ifi_flags = dev_get_flags(dev); -- hdr->ifi_change = 0; -- -- NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); -- -- if (dev->addr_len) -- NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); -- -- NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); -- if (dev->ifindex != dev->iflink) -- NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); -- -- protoinfo = nla_nest_start(skb, IFLA_PROTINFO); -- if (protoinfo == NULL) -- goto nla_put_failure; -- -- NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); -- -- ci.max_reasm_len = IPV6_MAXPLEN; -- ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100 -- + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); -- ci.reachable_time = idev->nd_parms->reachable_time; -- ci.retrans_time = idev->nd_parms->retrans_time; -- NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); -- -- nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); -- if (nla == NULL) -- goto nla_put_failure; -- ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); -- -- /* XXX - MC not implemented */ -- -- nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); -- if (nla == NULL) -- goto nla_put_failure; -- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); -- -- nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); -- if (nla == NULL) -- goto nla_put_failure; -- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); -- -- nla_nest_end(skb, protoinfo); -- return nlmsg_end(skb, nlh); -- --nla_put_failure: -- nlmsg_cancel(skb, nlh); -- return -EMSGSIZE; --} -- --static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) --{ -- int idx, err; -- int s_idx = cb->args[0]; -- struct net_device *dev; -- struct inet6_dev *idev; -- -- read_lock(&dev_base_lock); -- idx = 0; ++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); + } + + static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, +@@ -3612,19 +3643,22 @@ + + static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + int idx, err; + int s_idx = cb->args[0]; + struct net_device *dev; + struct inet6_dev *idev; + struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL; + ++ if (net != &init_net) ++ return 0; + /* FIXME: maybe disable ipv6 on non v6 guests? + if (skb->sk && skb->sk->sk_vx_info) + return skb->len; */ + + read_lock(&dev_base_lock); + idx = 0; - for_each_netdev(dev) { -- if (idx < s_idx) -- goto cont; -- if ((idev = in6_dev_get(dev)) == NULL) -- goto cont; -- err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, -- cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); -- in6_dev_put(idev); -- if (err <= 0) -- break; --cont: -- idx++; -- } -- read_unlock(&dev_base_lock); -- cb->args[0] = idx; -- -- return skb->len; --} -- --void inet6_ifinfo_notify(int event, struct inet6_dev *idev) --{ -- struct sk_buff *skb; -- int err = -ENOBUFS; -- -- skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC); -- if (skb == NULL) -- goto errout; -- -- err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); -- if (err < 0) { -- /* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */ -- WARN_ON(err == -EMSGSIZE); -- kfree_skb(skb); -- goto errout; -- } ++ for_each_netdev(&init_net, dev) { + if (idx < s_idx) + goto cont; + if (!v6_dev_in_nx_info(dev, nxi)) +@@ -3661,10 +3695,10 @@ + kfree_skb(skb); + goto errout; + } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); --errout: -- if (err < 0) ++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + errout: + if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); --} -- --static inline size_t inet6_prefix_nlmsg_size(void) --{ -- return NLMSG_ALIGN(sizeof(struct prefixmsg)) -- + nla_total_size(sizeof(struct in6_addr)) -- + nla_total_size(sizeof(struct prefix_cacheinfo)); --} -- --static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, -- struct prefix_info *pinfo, u32 pid, u32 seq, -- int event, unsigned int flags) --{ -- struct prefixmsg *pmsg; -- struct nlmsghdr *nlh; -- struct prefix_cacheinfo ci; -- -- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); -- if (nlh == NULL) -- return -EMSGSIZE; -- -- pmsg = nlmsg_data(nlh); -- pmsg->prefix_family = AF_INET6; -- pmsg->prefix_pad1 = 0; -- pmsg->prefix_pad2 = 0; -- pmsg->prefix_ifindex = idev->dev->ifindex; -- pmsg->prefix_len = pinfo->prefix_len; -- pmsg->prefix_type = pinfo->type; -- pmsg->prefix_pad3 = 0; -- pmsg->prefix_flags = 0; -- if (pinfo->onlink) -- pmsg->prefix_flags |= IF_PREFIX_ONLINK; -- if (pinfo->autoconf) -- pmsg->prefix_flags |= IF_PREFIX_AUTOCONF; -- -- NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix); -- -- ci.preferred_time = ntohl(pinfo->prefered); -- ci.valid_time = ntohl(pinfo->valid); -- NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci); -- -- return nlmsg_end(skb, nlh); -- --nla_put_failure: -- nlmsg_cancel(skb, nlh); -- return -EMSGSIZE; --} -- --static void inet6_prefix_notify(int event, struct inet6_dev *idev, -- struct prefix_info *pinfo) --{ -- struct sk_buff *skb; -- int err = -ENOBUFS; -- -- skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC); -- if (skb == NULL) -- goto errout; -- -- err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); -- if (err < 0) { -- /* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */ -- WARN_ON(err == -EMSGSIZE); -- kfree_skb(skb); -- goto errout; -- } ++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); + } + + static inline size_t inet6_prefix_nlmsg_size(void) +@@ -3730,10 +3764,10 @@ + kfree_skb(skb); + goto errout; + } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); --errout: -- if (err < 0) ++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + errout: + if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); --} -- --static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) --{ -- inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); -- -- switch (event) { -- case RTM_NEWADDR: -- /* -- * If the address was optimistic -- * we inserted the route at the start of -- * our DAD process, so we don't need -- * to do it again -- */ -- if (!(ifp->rt->rt6i_node)) -- ip6_ins_rt(ifp->rt); -- if (ifp->idev->cnf.forwarding) -- addrconf_join_anycast(ifp); -- break; -- case RTM_DELADDR: -- if (ifp->idev->cnf.forwarding) -- addrconf_leave_anycast(ifp); -- addrconf_leave_solict(ifp->idev, &ifp->addr); -- dst_hold(&ifp->rt->u.dst); -- if (ip6_del_rt(ifp->rt)) -- dst_free(&ifp->rt->u.dst); -- break; -- } --} -- --static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) --{ -- rcu_read_lock_bh(); -- if (likely(ifp->idev->dead == 0)) -- __ipv6_ifa_notify(event, ifp); -- rcu_read_unlock_bh(); --} -- --#ifdef CONFIG_SYSCTL -- --static --int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, -- void __user *buffer, size_t *lenp, loff_t *ppos) --{ -- int *valp = ctl->data; -- int val = *valp; -- int ret; -- -- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); -- -- if (write && valp != &ipv6_devconf_dflt.forwarding) { -- if (valp != &ipv6_devconf.forwarding) { -- if ((!*valp) ^ (!val)) { -- struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; -- if (idev == NULL) -- return ret; -- dev_forward_change(idev); -- } -- } else { -- ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding; -- addrconf_forward_change(); -- } -- if (*valp) -- rt6_purge_dflt_routers(); -- } -- -- return ret; --} -- --static int addrconf_sysctl_forward_strategy(ctl_table *table, -- int __user *name, int nlen, -- void __user *oldval, -- size_t __user *oldlenp, -- void __user *newval, size_t newlen) --{ -- int *valp = table->data; -- int new; -- -- if (!newval || !newlen) -- return 0; -- if (newlen != sizeof(int)) -- return -EINVAL; -- if (get_user(new, (int __user *)newval)) -- return -EFAULT; -- if (new == *valp) -- return 0; -- if (oldval && oldlenp) { -- size_t len; -- if (get_user(len, oldlenp)) -- return -EFAULT; -- if (len) { -- if (len > table->maxlen) -- len = table->maxlen; -- if (copy_to_user(oldval, valp, len)) -- return -EFAULT; -- if (put_user(len, oldlenp)) -- return -EFAULT; -- } -- } -- -- if (valp != &ipv6_devconf_dflt.forwarding) { -- if (valp != &ipv6_devconf.forwarding) { -- struct inet6_dev *idev = (struct inet6_dev *)table->extra1; -- int changed; -- if (unlikely(idev == NULL)) -- return -ENODEV; -- changed = (!*valp) ^ (!new); -- *valp = new; -- if (changed) -- dev_forward_change(idev); -- } else { -- *valp = new; -- addrconf_forward_change(); -- } -- -- if (*valp) -- rt6_purge_dflt_routers(); -- } else -- *valp = new; -- -- return 1; --} -- --static struct addrconf_sysctl_table --{ -- struct ctl_table_header *sysctl_header; -- ctl_table addrconf_vars[__NET_IPV6_MAX]; -- ctl_table addrconf_dev[2]; -- ctl_table addrconf_conf_dir[2]; -- ctl_table addrconf_proto_dir[2]; -- ctl_table addrconf_root_dir[2]; --} addrconf_sysctl __read_mostly = { -- .sysctl_header = NULL, -- .addrconf_vars = { -- { -- .ctl_name = NET_IPV6_FORWARDING, -- .procname = "forwarding", -- .data = &ipv6_devconf.forwarding, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &addrconf_sysctl_forward, -- .strategy = &addrconf_sysctl_forward_strategy, -- }, -- { -- .ctl_name = NET_IPV6_HOP_LIMIT, -- .procname = "hop_limit", -- .data = &ipv6_devconf.hop_limit, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_MTU, -- .procname = "mtu", -- .data = &ipv6_devconf.mtu6, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_ACCEPT_RA, -- .procname = "accept_ra", -- .data = &ipv6_devconf.accept_ra, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_ACCEPT_REDIRECTS, -- .procname = "accept_redirects", -- .data = &ipv6_devconf.accept_redirects, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_AUTOCONF, -- .procname = "autoconf", -- .data = &ipv6_devconf.autoconf, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_DAD_TRANSMITS, -- .procname = "dad_transmits", -- .data = &ipv6_devconf.dad_transmits, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_RTR_SOLICITS, -- .procname = "router_solicitations", -- .data = &ipv6_devconf.rtr_solicits, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL, -- .procname = "router_solicitation_interval", -- .data = &ipv6_devconf.rtr_solicit_interval, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec_jiffies, -- .strategy = &sysctl_jiffies, -- }, -- { -- .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY, -- .procname = "router_solicitation_delay", -- .data = &ipv6_devconf.rtr_solicit_delay, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec_jiffies, -- .strategy = &sysctl_jiffies, -- }, -- { -- .ctl_name = NET_IPV6_FORCE_MLD_VERSION, -- .procname = "force_mld_version", -- .data = &ipv6_devconf.force_mld_version, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, --#ifdef CONFIG_IPV6_PRIVACY -- { -- .ctl_name = NET_IPV6_USE_TEMPADDR, -- .procname = "use_tempaddr", -- .data = &ipv6_devconf.use_tempaddr, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_TEMP_VALID_LFT, -- .procname = "temp_valid_lft", -- .data = &ipv6_devconf.temp_valid_lft, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_TEMP_PREFERED_LFT, -- .procname = "temp_prefered_lft", -- .data = &ipv6_devconf.temp_prefered_lft, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_REGEN_MAX_RETRY, -- .procname = "regen_max_retry", -- .data = &ipv6_devconf.regen_max_retry, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR, -- .procname = "max_desync_factor", -- .data = &ipv6_devconf.max_desync_factor, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, --#endif -- { -- .ctl_name = NET_IPV6_MAX_ADDRESSES, -- .procname = "max_addresses", -- .data = &ipv6_devconf.max_addresses, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR, -- .procname = "accept_ra_defrtr", -- .data = &ipv6_devconf.accept_ra_defrtr, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_ACCEPT_RA_PINFO, -- .procname = "accept_ra_pinfo", -- .data = &ipv6_devconf.accept_ra_pinfo, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, --#ifdef CONFIG_IPV6_ROUTER_PREF -- { -- .ctl_name = NET_IPV6_ACCEPT_RA_RTR_PREF, -- .procname = "accept_ra_rtr_pref", -- .data = &ipv6_devconf.accept_ra_rtr_pref, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL, -- .procname = "router_probe_interval", -- .data = &ipv6_devconf.rtr_probe_interval, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec_jiffies, -- .strategy = &sysctl_jiffies, -- }, --#ifdef CONFIG_IPV6_ROUTE_INFO -- { -- .ctl_name = NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, -- .procname = "accept_ra_rt_info_max_plen", -- .data = &ipv6_devconf.accept_ra_rt_info_max_plen, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, --#endif --#endif -- { -- .ctl_name = NET_IPV6_PROXY_NDP, -- .procname = "proxy_ndp", -- .data = &ipv6_devconf.proxy_ndp, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, -- { -- .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE, -- .procname = "accept_source_route", -- .data = &ipv6_devconf.accept_source_route, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- }, --#ifdef CONFIG_IPV6_OPTIMISTIC_DAD -- { -- .ctl_name = CTL_UNNUMBERED, -- .procname = "optimistic_dad", -- .data = &ipv6_devconf.optimistic_dad, -- .maxlen = sizeof(int), -- .mode = 0644, -- .proc_handler = &proc_dointvec, -- -- }, --#endif -- { -- .ctl_name = 0, /* sentinel */ -- } -- }, -- .addrconf_dev = { -- { -- .ctl_name = NET_PROTO_CONF_ALL, -- .procname = "all", -- .mode = 0555, -- .child = addrconf_sysctl.addrconf_vars, -- }, -- { -- .ctl_name = 0, /* sentinel */ -- } -- }, -- .addrconf_conf_dir = { -- { -- .ctl_name = NET_IPV6_CONF, -- .procname = "conf", -- .mode = 0555, -- .child = addrconf_sysctl.addrconf_dev, -- }, -- { -- .ctl_name = 0, /* sentinel */ -- } -- }, -- .addrconf_proto_dir = { -- { -- .ctl_name = NET_IPV6, -- .procname = "ipv6", -- .mode = 0555, -- .child = addrconf_sysctl.addrconf_conf_dir, -- }, -- { -- .ctl_name = 0, /* sentinel */ -- } -- }, -- .addrconf_root_dir = { -- { -- .ctl_name = CTL_NET, -- .procname = "net", -- .mode = 0555, -- .child = addrconf_sysctl.addrconf_proto_dir, -- }, -- { -- .ctl_name = 0, /* sentinel */ -- } -- }, --}; -- --static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p) --{ -- int i; -- struct net_device *dev = idev ? idev->dev : NULL; -- struct addrconf_sysctl_table *t; -- char *dev_name = NULL; -- -- t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); -- if (t == NULL) -- return; -- for (i=0; t->addrconf_vars[i].data; i++) { -- t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; -- t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ -- } -- if (dev) { -- dev_name = dev->name; -- t->addrconf_dev[0].ctl_name = dev->ifindex; -- } else { -- dev_name = "default"; -- t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT; -- } -- -- /* -- * Make a copy of dev_name, because '.procname' is regarded as const -- * by sysctl and we wouldn't want anyone to change it under our feet -- * (see SIOCSIFNAME). -- */ -- dev_name = kstrdup(dev_name, GFP_KERNEL); -- if (!dev_name) -- goto free; -- -- t->addrconf_dev[0].procname = dev_name; -- -- t->addrconf_dev[0].child = t->addrconf_vars; -- t->addrconf_conf_dir[0].child = t->addrconf_dev; -- t->addrconf_proto_dir[0].child = t->addrconf_conf_dir; -- t->addrconf_root_dir[0].child = t->addrconf_proto_dir; -- -- t->sysctl_header = register_sysctl_table(t->addrconf_root_dir); -- if (t->sysctl_header == NULL) -- goto free_procname; -- else -- p->sysctl = t; -- return; -- -- /* error path */ -- free_procname: -- kfree(dev_name); -- free: -- kfree(t); -- -- return; --} -- --static void addrconf_sysctl_unregister(struct ipv6_devconf *p) --{ -- if (p->sysctl) { -- struct addrconf_sysctl_table *t = p->sysctl; -- p->sysctl = NULL; -- unregister_sysctl_table(t->sysctl_header); -- kfree(t->addrconf_dev[0].procname); -- kfree(t); -- } --} -- -- --#endif -- --/* -- * Device notifier -- */ -- --int register_inet6addr_notifier(struct notifier_block *nb) --{ -- return atomic_notifier_chain_register(&inet6addr_chain, nb); --} -- --EXPORT_SYMBOL(register_inet6addr_notifier); -- --int unregister_inet6addr_notifier(struct notifier_block *nb) --{ -- return atomic_notifier_chain_unregister(&inet6addr_chain,nb); --} -- --EXPORT_SYMBOL(unregister_inet6addr_notifier); -- --/* -- * Init / cleanup code -- */ -- --int __init addrconf_init(void) --{ -- int err = 0; -- -- /* The addrconf netdev notifier requires that loopback_dev -- * has it's ipv6 private information allocated and setup -- * before it can bring up and give link-local addresses -- * to other devices which are up. -- * -- * Unfortunately, loopback_dev is not necessarily the first -- * entry in the global dev_base list of net devices. In fact, -- * it is likely to be the very last entry on that list. -- * So this causes the notifier registry below to try and -- * give link-local addresses to all devices besides loopback_dev -- * first, then loopback_dev, which cases all the non-loopback_dev -- * devices to fail to get a link-local address. -- * -- * So, as a temporary fix, allocate the ipv6 structure for -- * loopback_dev first by hand. -- * Longer term, all of the dependencies ipv6 has upon the loopback -- * device and it being up should be removed. -- */ -- rtnl_lock(); ++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err); + } + + static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) +@@ -4244,16 +4278,16 @@ + * device and it being up should be removed. + */ + rtnl_lock(); - if (!ipv6_add_dev(&loopback_dev)) -- err = -ENOMEM; -- rtnl_unlock(); -- if (err) -- return err; -- ++ if (!ipv6_add_dev(&init_net.loopback_dev)) + err = -ENOMEM; + rtnl_unlock(); + if (err) + return err; + - ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); --#ifdef CONFIG_IPV6_MULTIPLE_TABLES ++ ip6_null_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES - ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev); - ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev); --#endif -- -- register_netdevice_notifier(&ipv6_dev_notf); -- -- addrconf_verify(0); -- -- err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo); -- if (err < 0) -- goto errout; -- -- /* Only the first call to __rtnl_register can fail */ -- __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL); -- __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL); -- __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr); -- __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr); -- __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr); -- --#ifdef CONFIG_SYSCTL -- addrconf_sysctl.sysctl_header = -- register_sysctl_table(addrconf_sysctl.addrconf_root_dir); -- addrconf_sysctl_register(NULL, &ipv6_devconf_dflt); --#endif -- -- return 0; --errout: -- unregister_netdevice_notifier(&ipv6_dev_notf); -- -- return err; --} -- --void __exit addrconf_cleanup(void) --{ -- struct net_device *dev; -- struct inet6_dev *idev; -- struct inet6_ifaddr *ifa; -- int i; -- -- unregister_netdevice_notifier(&ipv6_dev_notf); -- --#ifdef CONFIG_SYSCTL -- addrconf_sysctl_unregister(&ipv6_devconf_dflt); -- addrconf_sysctl_unregister(&ipv6_devconf); --#endif -- -- rtnl_lock(); -- -- /* -- * clean dev list. -- */ -- ++ ip6_prohibit_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); ++ ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); + #endif + + register_netdevice_notifier(&ipv6_dev_notf); +@@ -4304,12 +4338,12 @@ + * clean dev list. + */ + - for_each_netdev(dev) { -- if ((idev = __in6_dev_get(dev)) == NULL) -- continue; -- addrconf_ifdown(dev, 1); -- } ++ for_each_netdev(&init_net, dev) { + if ((idev = __in6_dev_get(dev)) == NULL) + continue; + addrconf_ifdown(dev, 1); + } - addrconf_ifdown(&loopback_dev, 2); -- -- /* -- * Check hash table. -- */ -- -- write_lock_bh(&addrconf_hash_lock); -- for (i=0; i < IN6_ADDR_HSIZE; i++) { -- for (ifa=inet6_addr_lst[i]; ifa; ) { -- struct inet6_ifaddr *bifa; -- -- bifa = ifa; -- ifa = ifa->lst_next; -- printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa); -- /* Do not free it; something is wrong. -- Now we can investigate it with debugger. -- */ -- } -- } -- write_unlock_bh(&addrconf_hash_lock); -- -- del_timer(&addr_chk_timer); -- -- rtnl_unlock(); -- --#ifdef CONFIG_PROC_FS ++ addrconf_ifdown(&init_net.loopback_dev, 2); + + /* + * Check hash table. +@@ -4335,6 +4369,6 @@ + rtnl_unlock(); + + #ifdef CONFIG_PROC_FS - proc_net_remove("if_inet6"); --#endif --} ++ proc_net_remove(&init_net, "if_inet6"); + #endif + } diff -Nurb linux-2.6.22-570/net/ipv6/af_inet6.c linux-2.6.22-590/net/ipv6/af_inet6.c --- linux-2.6.22-570/net/ipv6/af_inet6.c 2008-03-20 13:25:46.000000000 -0400 +++ linux-2.6.22-590/net/ipv6/af_inet6.c 2008-03-20 13:28:03.000000000 -0400 @@ -194183,294 +183123,6 @@ diff -Nurb linux-2.6.22-570/net/netfilter/xt_MARK.c linux-2.6.22-590/net/netfilt } */ connection_sk = (*pskb)->sk; -diff -Nurb linux-2.6.22-570/net/netfilter/xt_MARK.c.orig linux-2.6.22-590/net/netfilter/xt_MARK.c.orig ---- linux-2.6.22-570/net/netfilter/xt_MARK.c.orig 2008-03-20 13:25:49.000000000 -0400 -+++ linux-2.6.22-590/net/netfilter/xt_MARK.c.orig 1969-12-31 19:00:00.000000000 -0500 -@@ -1,284 +0,0 @@ --/* This is a module which is used for setting the NFMARK field of an skb. */ -- --/* (C) 1999-2001 Marc Boucher -- * -- * This program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License version 2 as -- * published by the Free Software Foundation. -- * -- */ -- --#include --#include --#include --#include --#include --#include --#include -- --#include --#include --#include -- --MODULE_LICENSE("GPL"); --MODULE_AUTHOR("Marc Boucher "); --MODULE_DESCRIPTION("ip[6]tables MARK modification module"); --MODULE_ALIAS("ipt_MARK"); --MODULE_ALIAS("ip6t_MARK"); -- --static inline u_int16_t --get_dst_port(struct nf_conntrack_tuple *tuple) --{ -- switch (tuple->dst.protonum) { -- case IPPROTO_GRE: -- /* XXX Truncate 32-bit GRE key to 16 bits */ --#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11) -- return tuple->dst.u.gre.key; --#else -- return htons(ntohl(tuple->dst.u.gre.key)); --#endif -- case IPPROTO_ICMP: -- /* Bind on ICMP echo ID */ -- return tuple->src.u.icmp.id; -- case IPPROTO_TCP: -- return tuple->dst.u.tcp.port; -- case IPPROTO_UDP: -- return tuple->dst.u.udp.port; -- default: -- return tuple->dst.u.all; -- } --} -- --static inline u_int16_t --get_src_port(struct nf_conntrack_tuple *tuple) --{ -- switch (tuple->dst.protonum) { -- case IPPROTO_GRE: -- /* XXX Truncate 32-bit GRE key to 16 bits */ -- return htons(ntohl(tuple->src.u.gre.key)); -- case IPPROTO_ICMP: -- /* Bind on ICMP echo ID */ -- return tuple->src.u.icmp.id; -- case IPPROTO_TCP: -- return tuple->src.u.tcp.port; -- case IPPROTO_UDP: -- return tuple->src.u.udp.port; -- default: -- return tuple->src.u.all; -- } --} -- --static unsigned int --target_v0(struct sk_buff **pskb, -- const struct net_device *in, -- const struct net_device *out, -- unsigned int hooknum, -- const struct xt_target *target, -- const void *targinfo) --{ -- const struct xt_mark_target_info *markinfo = targinfo; -- -- (*pskb)->mark = markinfo->mark; -- return XT_CONTINUE; --} -- --static unsigned int --target_v1(struct sk_buff **pskb, -- const struct net_device *in, -- const struct net_device *out, -- unsigned int hooknum, -- const struct xt_target *target, -- const void *targinfo) --{ -- const struct xt_mark_target_info_v1 *markinfo = targinfo; -- int mark = -1; -- -- switch (markinfo->mode) { -- case XT_MARK_SET: -- mark = markinfo->mark; -- break; -- -- case XT_MARK_AND: -- mark = (*pskb)->mark & markinfo->mark; -- break; -- -- case XT_MARK_OR: -- mark = (*pskb)->mark | markinfo->mark; -- break; -- -- case XT_MARK_COPYXID: { -- enum ip_conntrack_info ctinfo; -- struct sock *connection_sk=NULL; -- int dif; -- -- struct nf_conn *ct = nf_ct_get((*pskb), &ctinfo); -- extern struct inet_hashinfo tcp_hashinfo; -- enum ip_conntrack_dir dir; -- if (!ct) -- break; -- -- dir = CTINFO2DIR(ctinfo); -- u_int32_t src_ip = ct->tuplehash[dir].tuple.src.u3.ip; -- u_int16_t src_port = get_src_port(&ct->tuplehash[dir].tuple); -- u_int16_t proto = ct->tuplehash[dir].tuple.dst.protonum; -- -- u_int32_t ip; -- u_int16_t port; -- -- dif = ((struct rtable *)(*pskb)->dst)->rt_iif; -- ip = ct->tuplehash[dir].tuple.dst.u3.ip; -- port = get_dst_port(&ct->tuplehash[dir].tuple); -- -- if (proto == 1) { -- if (((*pskb)->mark!=-1) && (*pskb)->mark) -- ct->xid[0]=(*pskb)->mark; -- if (ct->xid[0]) -- mark = ct->xid[0]; -- printk(KERN_CRIT "%d %d\n",ct->xid[0],(*pskb)->mark); -- -- } -- else if (proto == 6) { -- if ((*pskb)->sk) -- connection_sk = (*pskb)->sk; -- else { -- connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif); -- } -- -- if (connection_sk) { -- connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[dir]; -- ct->xid[!dir]=connection_sk->sk_xid; -- if (connection_sk->sk_xid != 0) -- mark = connection_sk->sk_xid; -- if (connection_sk != (*pskb)->sk) -- sock_put(connection_sk); -- } -- break; -- } -- } -- } -- -- if (mark != -1) -- (*pskb)->mark = mark; -- return XT_CONTINUE; --} -- -- --static int --checkentry_v0(const char *tablename, -- const void *entry, -- const struct xt_target *target, -- void *targinfo, -- unsigned int hook_mask) --{ -- struct xt_mark_target_info *markinfo = targinfo; -- -- if (markinfo->mark > 0xffffffff) { -- printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); -- return 0; -- } -- return 1; --} -- --static int --checkentry_v1(const char *tablename, -- const void *entry, -- const struct xt_target *target, -- void *targinfo, -- unsigned int hook_mask) --{ -- struct xt_mark_target_info_v1 *markinfo = targinfo; -- -- if (markinfo->mode != XT_MARK_SET -- && markinfo->mode != XT_MARK_AND -- && markinfo->mode != XT_MARK_OR -- && markinfo->mode != XT_MARK_COPYXID) { -- printk(KERN_WARNING "MARK: unknown mode %u\n", -- markinfo->mode); -- return 0; -- } -- if (markinfo->mark > 0xffffffff) { -- printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); -- return 0; -- } -- return 1; --} -- --#ifdef CONFIG_COMPAT --struct compat_xt_mark_target_info_v1 { -- compat_ulong_t mark; -- u_int8_t mode; -- u_int8_t __pad1; -- u_int16_t __pad2; --}; -- --static void compat_from_user_v1(void *dst, void *src) --{ -- struct compat_xt_mark_target_info_v1 *cm = src; -- struct xt_mark_target_info_v1 m = { -- .mark = cm->mark, -- .mode = cm->mode, -- }; -- memcpy(dst, &m, sizeof(m)); --} -- --static int compat_to_user_v1(void __user *dst, void *src) --{ -- struct xt_mark_target_info_v1 *m = src; -- struct compat_xt_mark_target_info_v1 cm = { -- .mark = m->mark, -- .mode = m->mode, -- }; -- return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; --} --#endif /* CONFIG_COMPAT */ -- --static struct xt_target xt_mark_target[] = { -- { -- .name = "MARK", -- .family = AF_INET, -- .revision = 0, -- .checkentry = checkentry_v0, -- .target = target_v0, -- .targetsize = sizeof(struct xt_mark_target_info), -- .table = "mangle", -- .me = THIS_MODULE, -- }, -- { -- .name = "MARK", -- .family = AF_INET, -- .revision = 1, -- .checkentry = checkentry_v1, -- .target = target_v1, -- .targetsize = sizeof(struct xt_mark_target_info_v1), --#ifdef CONFIG_COMPAT -- .compatsize = sizeof(struct compat_xt_mark_target_info_v1), -- .compat_from_user = compat_from_user_v1, -- .compat_to_user = compat_to_user_v1, --#endif -- .table = "mangle", -- .me = THIS_MODULE, -- }, -- { -- .name = "MARK", -- .family = AF_INET6, -- .revision = 0, -- .checkentry = checkentry_v0, -- .target = target_v0, -- .targetsize = sizeof(struct xt_mark_target_info), -- .table = "mangle", -- .me = THIS_MODULE, -- }, --}; -- --static int __init xt_mark_init(void) --{ -- return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); --} -- --static void __exit xt_mark_fini(void) --{ -- xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); --} -- --module_init(xt_mark_init); --module_exit(xt_mark_fini); diff -Nurb linux-2.6.22-570/net/netfilter/xt_hashlimit.c linux-2.6.22-590/net/netfilter/xt_hashlimit.c --- linux-2.6.22-570/net/netfilter/xt_hashlimit.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-590/net/netfilter/xt_hashlimit.c 2008-03-20 13:28:08.000000000 -0400 @@ -196423,2354 +185075,6 @@ diff -Nurb linux-2.6.22-570/net/socket.c linux-2.6.22-590/net/socket.c return 0; } -diff -Nurb linux-2.6.22-570/net/socket.c.orig linux-2.6.22-590/net/socket.c.orig ---- linux-2.6.22-570/net/socket.c.orig 2008-03-20 13:25:40.000000000 -0400 -+++ linux-2.6.22-590/net/socket.c.orig 1969-12-31 19:00:00.000000000 -0500 -@@ -1,2344 +0,0 @@ --/* -- * NET An implementation of the SOCKET network access protocol. -- * -- * Version: @(#)socket.c 1.1.93 18/02/95 -- * -- * Authors: Orest Zborowski, -- * Ross Biro -- * Fred N. van Kempen, -- * -- * Fixes: -- * Anonymous : NOTSOCK/BADF cleanup. Error fix in -- * shutdown() -- * Alan Cox : verify_area() fixes -- * Alan Cox : Removed DDI -- * Jonathan Kamens : SOCK_DGRAM reconnect bug -- * Alan Cox : Moved a load of checks to the very -- * top level. -- * Alan Cox : Move address structures to/from user -- * mode above the protocol layers. -- * Rob Janssen : Allow 0 length sends. -- * Alan Cox : Asynchronous I/O support (cribbed from the -- * tty drivers). -- * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) -- * Jeff Uphoff : Made max number of sockets command-line -- * configurable. -- * Matti Aarnio : Made the number of sockets dynamic, -- * to be allocated when needed, and mr. -- * Uphoff's max is used as max to be -- * allowed to allocate. -- * Linus : Argh. removed all the socket allocation -- * altogether: it's in the inode now. -- * Alan Cox : Made sock_alloc()/sock_release() public -- * for NetROM and future kernel nfsd type -- * stuff. -- * Alan Cox : sendmsg/recvmsg basics. -- * Tom Dyas : Export net symbols. -- * Marcin Dalecki : Fixed problems with CONFIG_NET="n". -- * Alan Cox : Added thread locking to sys_* calls -- * for sockets. May have errors at the -- * moment. -- * Kevin Buhr : Fixed the dumb errors in the above. -- * Andi Kleen : Some small cleanups, optimizations, -- * and fixed a copy_from_user() bug. -- * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) -- * Tigran Aivazian : Made listen(2) backlog sanity checks -- * protocol-independent -- * -- * -- * This program is free software; you can redistribute it and/or -- * modify it under the terms of the GNU General Public License -- * as published by the Free Software Foundation; either version -- * 2 of the License, or (at your option) any later version. -- * -- * -- * This module is effectively the top level interface to the BSD socket -- * paradigm. -- * -- * Based upon Swansea University Computer Society NET3.039 -- */ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include --#include -- --#include -- --#include --#include -- --static int sock_no_open(struct inode *irrelevant, struct file *dontcare); --static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, -- unsigned long nr_segs, loff_t pos); --static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, -- unsigned long nr_segs, loff_t pos); --static int sock_mmap(struct file *file, struct vm_area_struct *vma); -- --static int sock_close(struct inode *inode, struct file *file); --static unsigned int sock_poll(struct file *file, -- struct poll_table_struct *wait); --static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); --#ifdef CONFIG_COMPAT --static long compat_sock_ioctl(struct file *file, -- unsigned int cmd, unsigned long arg); --#endif --static int sock_fasync(int fd, struct file *filp, int on); --static ssize_t sock_sendpage(struct file *file, struct page *page, -- int offset, size_t size, loff_t *ppos, int more); -- --/* -- * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear -- * in the operation structures but are done directly via the socketcall() multiplexor. -- */ -- --static const struct file_operations socket_file_ops = { -- .owner = THIS_MODULE, -- .llseek = no_llseek, -- .aio_read = sock_aio_read, -- .aio_write = sock_aio_write, -- .poll = sock_poll, -- .unlocked_ioctl = sock_ioctl, --#ifdef CONFIG_COMPAT -- .compat_ioctl = compat_sock_ioctl, --#endif -- .mmap = sock_mmap, -- .open = sock_no_open, /* special open code to disallow open via /proc */ -- .release = sock_close, -- .fasync = sock_fasync, -- .sendpage = sock_sendpage, -- .splice_write = generic_splice_sendpage, --}; -- --/* -- * The protocol list. Each protocol is registered in here. -- */ -- --static DEFINE_SPINLOCK(net_family_lock); --static const struct net_proto_family *net_families[NPROTO] __read_mostly; -- --/* -- * Statistics counters of the socket lists -- */ -- --static DEFINE_PER_CPU(int, sockets_in_use) = 0; -- --/* -- * Support routines. -- * Move socket addresses back and forth across the kernel/user -- * divide and look after the messy bits. -- */ -- --#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - -- 16 for IP, 16 for IPX, -- 24 for IPv6, -- about 80 for AX.25 -- must be at least one bigger than -- the AF_UNIX size (see net/unix/af_unix.c -- :unix_mkname()). -- */ -- --/** -- * move_addr_to_kernel - copy a socket address into kernel space -- * @uaddr: Address in user space -- * @kaddr: Address in kernel space -- * @ulen: Length in user space -- * -- * The address is copied into kernel space. If the provided address is -- * too long an error code of -EINVAL is returned. If the copy gives -- * invalid addresses -EFAULT is returned. On a success 0 is returned. -- */ -- --int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) --{ -- if (ulen < 0 || ulen > MAX_SOCK_ADDR) -- return -EINVAL; -- if (ulen == 0) -- return 0; -- if (copy_from_user(kaddr, uaddr, ulen)) -- return -EFAULT; -- return audit_sockaddr(ulen, kaddr); --} -- --/** -- * move_addr_to_user - copy an address to user space -- * @kaddr: kernel space address -- * @klen: length of address in kernel -- * @uaddr: user space address -- * @ulen: pointer to user length field -- * -- * The value pointed to by ulen on entry is the buffer length available. -- * This is overwritten with the buffer space used. -EINVAL is returned -- * if an overlong buffer is specified or a negative buffer size. -EFAULT -- * is returned if either the buffer or the length field are not -- * accessible. -- * After copying the data up to the limit the user specifies, the true -- * length of the data is written over the length limit the user -- * specified. Zero is returned for a success. -- */ -- --int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, -- int __user *ulen) --{ -- int err; -- int len; -- -- err = get_user(len, ulen); -- if (err) -- return err; -- if (len > klen) -- len = klen; -- if (len < 0 || len > MAX_SOCK_ADDR) -- return -EINVAL; -- if (len) { -- if (audit_sockaddr(klen, kaddr)) -- return -ENOMEM; -- if (copy_to_user(uaddr, kaddr, len)) -- return -EFAULT; -- } -- /* -- * "fromlen shall refer to the value before truncation.." -- * 1003.1g -- */ -- return __put_user(klen, ulen); --} -- --#define SOCKFS_MAGIC 0x534F434B -- --static struct kmem_cache *sock_inode_cachep __read_mostly; -- --static struct inode *sock_alloc_inode(struct super_block *sb) --{ -- struct socket_alloc *ei; -- -- ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); -- if (!ei) -- return NULL; -- init_waitqueue_head(&ei->socket.wait); -- -- ei->socket.fasync_list = NULL; -- ei->socket.state = SS_UNCONNECTED; -- ei->socket.flags = 0; -- ei->socket.ops = NULL; -- ei->socket.sk = NULL; -- ei->socket.file = NULL; -- -- return &ei->vfs_inode; --} -- --static void sock_destroy_inode(struct inode *inode) --{ -- kmem_cache_free(sock_inode_cachep, -- container_of(inode, struct socket_alloc, vfs_inode)); --} -- --static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) --{ -- struct socket_alloc *ei = (struct socket_alloc *)foo; -- -- inode_init_once(&ei->vfs_inode); --} -- --static int init_inodecache(void) --{ -- sock_inode_cachep = kmem_cache_create("sock_inode_cache", -- sizeof(struct socket_alloc), -- 0, -- (SLAB_HWCACHE_ALIGN | -- SLAB_RECLAIM_ACCOUNT | -- SLAB_MEM_SPREAD), -- init_once, -- NULL); -- if (sock_inode_cachep == NULL) -- return -ENOMEM; -- return 0; --} -- --static struct super_operations sockfs_ops = { -- .alloc_inode = sock_alloc_inode, -- .destroy_inode =sock_destroy_inode, -- .statfs = simple_statfs, --}; -- --static int sockfs_get_sb(struct file_system_type *fs_type, -- int flags, const char *dev_name, void *data, -- struct vfsmount *mnt) --{ -- return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, -- mnt); --} -- --static struct vfsmount *sock_mnt __read_mostly; -- --static struct file_system_type sock_fs_type = { -- .name = "sockfs", -- .get_sb = sockfs_get_sb, -- .kill_sb = kill_anon_super, --}; -- --static int sockfs_delete_dentry(struct dentry *dentry) --{ -- /* -- * At creation time, we pretended this dentry was hashed -- * (by clearing DCACHE_UNHASHED bit in d_flags) -- * At delete time, we restore the truth : not hashed. -- * (so that dput() can proceed correctly) -- */ -- dentry->d_flags |= DCACHE_UNHASHED; -- return 0; --} -- --/* -- * sockfs_dname() is called from d_path(). -- */ --static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) --{ -- return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", -- dentry->d_inode->i_ino); --} -- --static struct dentry_operations sockfs_dentry_operations = { -- .d_delete = sockfs_delete_dentry, -- .d_dname = sockfs_dname, --}; -- --/* -- * Obtains the first available file descriptor and sets it up for use. -- * -- * These functions create file structures and maps them to fd space -- * of the current process. On success it returns file descriptor -- * and file struct implicitly stored in sock->file. -- * Note that another thread may close file descriptor before we return -- * from this function. We use the fact that now we do not refer -- * to socket after mapping. If one day we will need it, this -- * function will increment ref. count on file by 1. -- * -- * In any case returned fd MAY BE not valid! -- * This race condition is unavoidable -- * with shared fd spaces, we cannot solve it inside kernel, -- * but we take care of internal coherence yet. -- */ -- --static int sock_alloc_fd(struct file **filep) --{ -- int fd; -- -- fd = get_unused_fd(); -- if (likely(fd >= 0)) { -- struct file *file = get_empty_filp(); -- -- *filep = file; -- if (unlikely(!file)) { -- put_unused_fd(fd); -- return -ENFILE; -- } -- } else -- *filep = NULL; -- return fd; --} -- --static int sock_attach_fd(struct socket *sock, struct file *file) --{ -- struct qstr name = { .name = "" }; -- -- file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); -- if (unlikely(!file->f_path.dentry)) -- return -ENOMEM; -- -- file->f_path.dentry->d_op = &sockfs_dentry_operations; -- /* -- * We dont want to push this dentry into global dentry hash table. -- * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED -- * This permits a working /proc/$pid/fd/XXX on sockets -- */ -- file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED; -- d_instantiate(file->f_path.dentry, SOCK_INODE(sock)); -- file->f_path.mnt = mntget(sock_mnt); -- file->f_mapping = file->f_path.dentry->d_inode->i_mapping; -- -- sock->file = file; -- file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops; -- file->f_mode = FMODE_READ | FMODE_WRITE; -- file->f_flags = O_RDWR; -- file->f_pos = 0; -- file->private_data = sock; -- -- return 0; --} -- --int sock_map_fd(struct socket *sock) --{ -- struct file *newfile; -- int fd = sock_alloc_fd(&newfile); -- -- if (likely(fd >= 0)) { -- int err = sock_attach_fd(sock, newfile); -- -- if (unlikely(err < 0)) { -- put_filp(newfile); -- put_unused_fd(fd); -- return err; -- } -- fd_install(fd, newfile); -- } -- return fd; --} -- --static struct socket *sock_from_file(struct file *file, int *err) --{ -- if (file->f_op == &socket_file_ops) -- return file->private_data; /* set in sock_map_fd */ -- -- *err = -ENOTSOCK; -- return NULL; --} -- --/** -- * sockfd_lookup - Go from a file number to its socket slot -- * @fd: file handle -- * @err: pointer to an error code return -- * -- * The file handle passed in is locked and the socket it is bound -- * too is returned. If an error occurs the err pointer is overwritten -- * with a negative errno code and NULL is returned. The function checks -- * for both invalid handles and passing a handle which is not a socket. -- * -- * On a success the socket object pointer is returned. -- */ -- --struct socket *sockfd_lookup(int fd, int *err) --{ -- struct file *file; -- struct socket *sock; -- -- file = fget(fd); -- if (!file) { -- *err = -EBADF; -- return NULL; -- } -- -- sock = sock_from_file(file, err); -- if (!sock) -- fput(file); -- return sock; --} -- --static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) --{ -- struct file *file; -- struct socket *sock; -- -- *err = -EBADF; -- file = fget_light(fd, fput_needed); -- if (file) { -- sock = sock_from_file(file, err); -- if (sock) -- return sock; -- fput_light(file, *fput_needed); -- } -- return NULL; --} -- --/** -- * sock_alloc - allocate a socket -- * -- * Allocate a new inode and socket object. The two are bound together -- * and initialised. The socket is then returned. If we are out of inodes -- * NULL is returned. -- */ -- --static struct socket *sock_alloc(void) --{ -- struct inode *inode; -- struct socket *sock; -- -- inode = new_inode(sock_mnt->mnt_sb); -- if (!inode) -- return NULL; -- -- sock = SOCKET_I(inode); -- -- inode->i_mode = S_IFSOCK | S_IRWXUGO; -- inode->i_uid = current->fsuid; -- inode->i_gid = current->fsgid; -- -- get_cpu_var(sockets_in_use)++; -- put_cpu_var(sockets_in_use); -- return sock; --} -- --/* -- * In theory you can't get an open on this inode, but /proc provides -- * a back door. Remember to keep it shut otherwise you'll let the -- * creepy crawlies in. -- */ -- --static int sock_no_open(struct inode *irrelevant, struct file *dontcare) --{ -- return -ENXIO; --} -- --const struct file_operations bad_sock_fops = { -- .owner = THIS_MODULE, -- .open = sock_no_open, --}; -- --/** -- * sock_release - close a socket -- * @sock: socket to close -- * -- * The socket is released from the protocol stack if it has a release -- * callback, and the inode is then released if the socket is bound to -- * an inode not a file. -- */ -- --void sock_release(struct socket *sock) --{ -- if (sock->ops) { -- struct module *owner = sock->ops->owner; -- -- sock->ops->release(sock); -- sock->ops = NULL; -- module_put(owner); -- } -- -- if (sock->fasync_list) -- printk(KERN_ERR "sock_release: fasync list not empty!\n"); -- -- get_cpu_var(sockets_in_use)--; -- put_cpu_var(sockets_in_use); -- if (!sock->file) { -- iput(SOCK_INODE(sock)); -- return; -- } -- sock->file = NULL; --} -- --static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, -- struct msghdr *msg, size_t size) --{ -- struct sock_iocb *si = kiocb_to_siocb(iocb); -- int err; -- -- si->sock = sock; -- si->scm = NULL; -- si->msg = msg; -- si->size = size; -- -- err = security_socket_sendmsg(sock, msg, size); -- if (err) -- return err; -- -- return sock->ops->sendmsg(iocb, sock, msg, size); --} -- --int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) --{ -- struct kiocb iocb; -- struct sock_iocb siocb; -- int ret; -- -- init_sync_kiocb(&iocb, NULL); -- iocb.private = &siocb; -- ret = __sock_sendmsg(&iocb, sock, msg, size); -- if (-EIOCBQUEUED == ret) -- ret = wait_on_sync_kiocb(&iocb); -- return ret; --} -- --int kernel_sendmsg(struct socket *sock, struct msghdr *msg, -- struct kvec *vec, size_t num, size_t size) --{ -- mm_segment_t oldfs = get_fs(); -- int result; -- -- set_fs(KERNEL_DS); -- /* -- * the following is safe, since for compiler definitions of kvec and -- * iovec are identical, yielding the same in-core layout and alignment -- */ -- msg->msg_iov = (struct iovec *)vec; -- msg->msg_iovlen = num; -- result = sock_sendmsg(sock, msg, size); -- set_fs(oldfs); -- return result; --} -- --/* -- * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) -- */ --void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, -- struct sk_buff *skb) --{ -- ktime_t kt = skb->tstamp; -- -- if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { -- struct timeval tv; -- /* Race occurred between timestamp enabling and packet -- receiving. Fill in the current time for now. */ -- if (kt.tv64 == 0) -- kt = ktime_get_real(); -- skb->tstamp = kt; -- tv = ktime_to_timeval(kt); -- put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); -- } else { -- struct timespec ts; -- /* Race occurred between timestamp enabling and packet -- receiving. Fill in the current time for now. */ -- if (kt.tv64 == 0) -- kt = ktime_get_real(); -- skb->tstamp = kt; -- ts = ktime_to_timespec(kt); -- put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts); -- } --} -- --EXPORT_SYMBOL_GPL(__sock_recv_timestamp); -- --static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, -- struct msghdr *msg, size_t size, int flags) --{ -- int err; -- struct sock_iocb *si = kiocb_to_siocb(iocb); -- -- si->sock = sock; -- si->scm = NULL; -- si->msg = msg; -- si->size = size; -- si->flags = flags; -- -- err = security_socket_recvmsg(sock, msg, size, flags); -- if (err) -- return err; -- -- return sock->ops->recvmsg(iocb, sock, msg, size, flags); --} -- --int sock_recvmsg(struct socket *sock, struct msghdr *msg, -- size_t size, int flags) --{ -- struct kiocb iocb; -- struct sock_iocb siocb; -- int ret; -- -- init_sync_kiocb(&iocb, NULL); -- iocb.private = &siocb; -- ret = __sock_recvmsg(&iocb, sock, msg, size, flags); -- if (-EIOCBQUEUED == ret) -- ret = wait_on_sync_kiocb(&iocb); -- return ret; --} -- --int kernel_recvmsg(struct socket *sock, struct msghdr *msg, -- struct kvec *vec, size_t num, size_t size, int flags) --{ -- mm_segment_t oldfs = get_fs(); -- int result; -- -- set_fs(KERNEL_DS); -- /* -- * the following is safe, since for compiler definitions of kvec and -- * iovec are identical, yielding the same in-core layout and alignment -- */ -- msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; -- result = sock_recvmsg(sock, msg, size, flags); -- set_fs(oldfs); -- return result; --} -- --static void sock_aio_dtor(struct kiocb *iocb) --{ -- kfree(iocb->private); --} -- --static ssize_t sock_sendpage(struct file *file, struct page *page, -- int offset, size_t size, loff_t *ppos, int more) --{ -- struct socket *sock; -- int flags; -- -- sock = file->private_data; -- -- flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; -- if (more) -- flags |= MSG_MORE; -- -- return sock->ops->sendpage(sock, page, offset, size, flags); --} -- --static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, -- struct sock_iocb *siocb) --{ -- if (!is_sync_kiocb(iocb)) { -- siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); -- if (!siocb) -- return NULL; -- iocb->ki_dtor = sock_aio_dtor; -- } -- -- siocb->kiocb = iocb; -- iocb->private = siocb; -- return siocb; --} -- --static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, -- struct file *file, const struct iovec *iov, -- unsigned long nr_segs) --{ -- struct socket *sock = file->private_data; -- size_t size = 0; -- int i; -- -- for (i = 0; i < nr_segs; i++) -- size += iov[i].iov_len; -- -- msg->msg_name = NULL; -- msg->msg_namelen = 0; -- msg->msg_control = NULL; -- msg->msg_controllen = 0; -- msg->msg_iov = (struct iovec *)iov; -- msg->msg_iovlen = nr_segs; -- msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; -- -- return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); --} -- --static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, -- unsigned long nr_segs, loff_t pos) --{ -- struct sock_iocb siocb, *x; -- -- if (pos != 0) -- return -ESPIPE; -- -- if (iocb->ki_left == 0) /* Match SYS5 behaviour */ -- return 0; -- -- -- x = alloc_sock_iocb(iocb, &siocb); -- if (!x) -- return -ENOMEM; -- return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); --} -- --static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, -- struct file *file, const struct iovec *iov, -- unsigned long nr_segs) --{ -- struct socket *sock = file->private_data; -- size_t size = 0; -- int i; -- -- for (i = 0; i < nr_segs; i++) -- size += iov[i].iov_len; -- -- msg->msg_name = NULL; -- msg->msg_namelen = 0; -- msg->msg_control = NULL; -- msg->msg_controllen = 0; -- msg->msg_iov = (struct iovec *)iov; -- msg->msg_iovlen = nr_segs; -- msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; -- if (sock->type == SOCK_SEQPACKET) -- msg->msg_flags |= MSG_EOR; -- -- return __sock_sendmsg(iocb, sock, msg, size); --} -- --static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, -- unsigned long nr_segs, loff_t pos) --{ -- struct sock_iocb siocb, *x; -- -- if (pos != 0) -- return -ESPIPE; -- -- x = alloc_sock_iocb(iocb, &siocb); -- if (!x) -- return -ENOMEM; -- -- return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); --} -- --/* -- * Atomic setting of ioctl hooks to avoid race -- * with module unload. -- */ -- --static DEFINE_MUTEX(br_ioctl_mutex); --static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; -- --void brioctl_set(int (*hook) (unsigned int, void __user *)) --{ -- mutex_lock(&br_ioctl_mutex); -- br_ioctl_hook = hook; -- mutex_unlock(&br_ioctl_mutex); --} -- --EXPORT_SYMBOL(brioctl_set); -- --static DEFINE_MUTEX(vlan_ioctl_mutex); --static int (*vlan_ioctl_hook) (void __user *arg); -- --void vlan_ioctl_set(int (*hook) (void __user *)) --{ -- mutex_lock(&vlan_ioctl_mutex); -- vlan_ioctl_hook = hook; -- mutex_unlock(&vlan_ioctl_mutex); --} -- --EXPORT_SYMBOL(vlan_ioctl_set); -- --static DEFINE_MUTEX(dlci_ioctl_mutex); --static int (*dlci_ioctl_hook) (unsigned int, void __user *); -- --void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) --{ -- mutex_lock(&dlci_ioctl_mutex); -- dlci_ioctl_hook = hook; -- mutex_unlock(&dlci_ioctl_mutex); --} -- --EXPORT_SYMBOL(dlci_ioctl_set); -- --/* -- * With an ioctl, arg may well be a user mode pointer, but we don't know -- * what to do with it - that's up to the protocol still. -- */ -- --static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) --{ -- struct socket *sock; -- void __user *argp = (void __user *)arg; -- int pid, err; -- -- sock = file->private_data; -- if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { -- err = dev_ioctl(cmd, argp); -- } else --#ifdef CONFIG_WIRELESS_EXT -- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { -- err = dev_ioctl(cmd, argp); -- } else --#endif /* CONFIG_WIRELESS_EXT */ -- switch (cmd) { -- case FIOSETOWN: -- case SIOCSPGRP: -- err = -EFAULT; -- if (get_user(pid, (int __user *)argp)) -- break; -- err = f_setown(sock->file, pid, 1); -- break; -- case FIOGETOWN: -- case SIOCGPGRP: -- err = put_user(f_getown(sock->file), -- (int __user *)argp); -- break; -- case SIOCGIFBR: -- case SIOCSIFBR: -- case SIOCBRADDBR: -- case SIOCBRDELBR: -- err = -ENOPKG; -- if (!br_ioctl_hook) -- request_module("bridge"); -- -- mutex_lock(&br_ioctl_mutex); -- if (br_ioctl_hook) -- err = br_ioctl_hook(cmd, argp); -- mutex_unlock(&br_ioctl_mutex); -- break; -- case SIOCGIFVLAN: -- case SIOCSIFVLAN: -- err = -ENOPKG; -- if (!vlan_ioctl_hook) -- request_module("8021q"); -- -- mutex_lock(&vlan_ioctl_mutex); -- if (vlan_ioctl_hook) -- err = vlan_ioctl_hook(argp); -- mutex_unlock(&vlan_ioctl_mutex); -- break; -- case SIOCADDDLCI: -- case SIOCDELDLCI: -- err = -ENOPKG; -- if (!dlci_ioctl_hook) -- request_module("dlci"); -- -- if (dlci_ioctl_hook) { -- mutex_lock(&dlci_ioctl_mutex); -- err = dlci_ioctl_hook(cmd, argp); -- mutex_unlock(&dlci_ioctl_mutex); -- } -- break; -- default: -- err = sock->ops->ioctl(sock, cmd, arg); -- -- /* -- * If this ioctl is unknown try to hand it down -- * to the NIC driver. -- */ -- if (err == -ENOIOCTLCMD) -- err = dev_ioctl(cmd, argp); -- break; -- } -- return err; --} -- --int sock_create_lite(int family, int type, int protocol, struct socket **res) --{ -- int err; -- struct socket *sock = NULL; -- -- err = security_socket_create(family, type, protocol, 1); -- if (err) -- goto out; -- -- sock = sock_alloc(); -- if (!sock) { -- err = -ENOMEM; -- goto out; -- } -- -- sock->type = type; -- err = security_socket_post_create(sock, family, type, protocol, 1); -- if (err) -- goto out_release; -- --out: -- *res = sock; -- return err; --out_release: -- sock_release(sock); -- sock = NULL; -- goto out; --} -- --/* No kernel lock held - perfect */ --static unsigned int sock_poll(struct file *file, poll_table *wait) --{ -- struct socket *sock; -- -- /* -- * We can't return errors to poll, so it's either yes or no. -- */ -- sock = file->private_data; -- return sock->ops->poll(file, sock, wait); --} -- --static int sock_mmap(struct file *file, struct vm_area_struct *vma) --{ -- struct socket *sock = file->private_data; -- -- return sock->ops->mmap(file, sock, vma); --} -- --static int sock_close(struct inode *inode, struct file *filp) --{ -- /* -- * It was possible the inode is NULL we were -- * closing an unfinished socket. -- */ -- -- if (!inode) { -- printk(KERN_DEBUG "sock_close: NULL inode\n"); -- return 0; -- } -- sock_fasync(-1, filp, 0); -- sock_release(SOCKET_I(inode)); -- return 0; --} -- --/* -- * Update the socket async list -- * -- * Fasync_list locking strategy. -- * -- * 1. fasync_list is modified only under process context socket lock -- * i.e. under semaphore. -- * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) -- * or under socket lock. -- * 3. fasync_list can be used from softirq context, so that -- * modification under socket lock have to be enhanced with -- * write_lock_bh(&sk->sk_callback_lock). -- * --ANK (990710) -- */ -- --static int sock_fasync(int fd, struct file *filp, int on) --{ -- struct fasync_struct *fa, *fna = NULL, **prev; -- struct socket *sock; -- struct sock *sk; -- -- if (on) { -- fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); -- if (fna == NULL) -- return -ENOMEM; -- } -- -- sock = filp->private_data; -- -- sk = sock->sk; -- if (sk == NULL) { -- kfree(fna); -- return -EINVAL; -- } -- -- lock_sock(sk); -- -- prev = &(sock->fasync_list); -- -- for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) -- if (fa->fa_file == filp) -- break; -- -- if (on) { -- if (fa != NULL) { -- write_lock_bh(&sk->sk_callback_lock); -- fa->fa_fd = fd; -- write_unlock_bh(&sk->sk_callback_lock); -- -- kfree(fna); -- goto out; -- } -- fna->fa_file = filp; -- fna->fa_fd = fd; -- fna->magic = FASYNC_MAGIC; -- fna->fa_next = sock->fasync_list; -- write_lock_bh(&sk->sk_callback_lock); -- sock->fasync_list = fna; -- write_unlock_bh(&sk->sk_callback_lock); -- } else { -- if (fa != NULL) { -- write_lock_bh(&sk->sk_callback_lock); -- *prev = fa->fa_next; -- write_unlock_bh(&sk->sk_callback_lock); -- kfree(fa); -- } -- } -- --out: -- release_sock(sock->sk); -- return 0; --} -- --/* This function may be called only under socket lock or callback_lock */ -- --int sock_wake_async(struct socket *sock, int how, int band) --{ -- if (!sock || !sock->fasync_list) -- return -1; -- switch (how) { -- case 1: -- -- if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) -- break; -- goto call_kill; -- case 2: -- if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) -- break; -- /* fall through */ -- case 0: --call_kill: -- __kill_fasync(sock->fasync_list, SIGIO, band); -- break; -- case 3: -- __kill_fasync(sock->fasync_list, SIGURG, band); -- } -- return 0; --} -- --static int __sock_create(int family, int type, int protocol, -- struct socket **res, int kern) --{ -- int err; -- struct socket *sock; -- const struct net_proto_family *pf; -- -- /* -- * Check protocol is in range -- */ -- if (family < 0 || family >= NPROTO) -- return -EAFNOSUPPORT; -- if (type < 0 || type >= SOCK_MAX) -- return -EINVAL; -- -- /* Compatibility. -- -- This uglymoron is moved from INET layer to here to avoid -- deadlock in module load. -- */ -- if (family == PF_INET && type == SOCK_PACKET) { -- static int warned; -- if (!warned) { -- warned = 1; -- printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", -- current->comm); -- } -- family = PF_PACKET; -- } -- -- err = security_socket_create(family, type, protocol, kern); -- if (err) -- return err; -- -- /* -- * Allocate the socket and allow the family to set things up. if -- * the protocol is 0, the family is instructed to select an appropriate -- * default. -- */ -- sock = sock_alloc(); -- if (!sock) { -- if (net_ratelimit()) -- printk(KERN_WARNING "socket: no more sockets\n"); -- return -ENFILE; /* Not exactly a match, but its the -- closest posix thing */ -- } -- -- sock->type = type; -- --#if defined(CONFIG_KMOD) -- /* Attempt to load a protocol module if the find failed. -- * -- * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user -- * requested real, full-featured networking support upon configuration. -- * Otherwise module support will break! -- */ -- if (net_families[family] == NULL) -- request_module("net-pf-%d", family); --#endif -- -- rcu_read_lock(); -- pf = rcu_dereference(net_families[family]); -- err = -EAFNOSUPPORT; -- if (!pf) -- goto out_release; -- -- /* -- * We will call the ->create function, that possibly is in a loadable -- * module, so we have to bump that loadable module refcnt first. -- */ -- if (!try_module_get(pf->owner)) -- goto out_release; -- -- /* Now protected by module ref count */ -- rcu_read_unlock(); -- -- err = pf->create(sock, protocol); -- if (err < 0) -- goto out_module_put; -- -- /* -- * Now to bump the refcnt of the [loadable] module that owns this -- * socket at sock_release time we decrement its refcnt. -- */ -- if (!try_module_get(sock->ops->owner)) -- goto out_module_busy; -- -- /* -- * Now that we're done with the ->create function, the [loadable] -- * module can have its refcnt decremented -- */ -- module_put(pf->owner); -- err = security_socket_post_create(sock, family, type, protocol, kern); -- if (err) -- goto out_sock_release; -- *res = sock; -- -- return 0; -- --out_module_busy: -- err = -EAFNOSUPPORT; --out_module_put: -- sock->ops = NULL; -- module_put(pf->owner); --out_sock_release: -- sock_release(sock); -- return err; -- --out_release: -- rcu_read_unlock(); -- goto out_sock_release; --} -- --int sock_create(int family, int type, int protocol, struct socket **res) --{ -- return __sock_create(family, type, protocol, res, 0); --} -- --int sock_create_kern(int family, int type, int protocol, struct socket **res) --{ -- return __sock_create(family, type, protocol, res, 1); --} -- --asmlinkage long sys_socket(int family, int type, int protocol) --{ -- int retval; -- struct socket *sock; -- -- retval = sock_create(family, type, protocol, &sock); -- if (retval < 0) -- goto out; -- -- retval = sock_map_fd(sock); -- if (retval < 0) -- goto out_release; -- --out: -- /* It may be already another descriptor 8) Not kernel problem. */ -- return retval; -- --out_release: -- sock_release(sock); -- return retval; --} -- --/* -- * Create a pair of connected sockets. -- */ -- --asmlinkage long sys_socketpair(int family, int type, int protocol, -- int __user *usockvec) --{ -- struct socket *sock1, *sock2; -- int fd1, fd2, err; -- struct file *newfile1, *newfile2; -- -- /* -- * Obtain the first socket and check if the underlying protocol -- * supports the socketpair call. -- */ -- -- err = sock_create(family, type, protocol, &sock1); -- if (err < 0) -- goto out; -- -- err = sock_create(family, type, protocol, &sock2); -- if (err < 0) -- goto out_release_1; -- -- err = sock1->ops->socketpair(sock1, sock2); -- if (err < 0) -- goto out_release_both; -- -- fd1 = sock_alloc_fd(&newfile1); -- if (unlikely(fd1 < 0)) { -- err = fd1; -- goto out_release_both; -- } -- -- fd2 = sock_alloc_fd(&newfile2); -- if (unlikely(fd2 < 0)) { -- err = fd2; -- put_filp(newfile1); -- put_unused_fd(fd1); -- goto out_release_both; -- } -- -- err = sock_attach_fd(sock1, newfile1); -- if (unlikely(err < 0)) { -- goto out_fd2; -- } -- -- err = sock_attach_fd(sock2, newfile2); -- if (unlikely(err < 0)) { -- fput(newfile1); -- goto out_fd1; -- } -- -- err = audit_fd_pair(fd1, fd2); -- if (err < 0) { -- fput(newfile1); -- fput(newfile2); -- goto out_fd; -- } -- -- fd_install(fd1, newfile1); -- fd_install(fd2, newfile2); -- /* fd1 and fd2 may be already another descriptors. -- * Not kernel problem. -- */ -- -- err = put_user(fd1, &usockvec[0]); -- if (!err) -- err = put_user(fd2, &usockvec[1]); -- if (!err) -- return 0; -- -- sys_close(fd2); -- sys_close(fd1); -- return err; -- --out_release_both: -- sock_release(sock2); --out_release_1: -- sock_release(sock1); --out: -- return err; -- --out_fd2: -- put_filp(newfile1); -- sock_release(sock1); --out_fd1: -- put_filp(newfile2); -- sock_release(sock2); --out_fd: -- put_unused_fd(fd1); -- put_unused_fd(fd2); -- goto out; --} -- --/* -- * Bind a name to a socket. Nothing much to do here since it's -- * the protocol's responsibility to handle the local address. -- * -- * We move the socket address to kernel space before we call -- * the protocol layer (having also checked the address is ok). -- */ -- --asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) --{ -- struct socket *sock; -- char address[MAX_SOCK_ADDR]; -- int err, fput_needed; -- -- sock = sockfd_lookup_light(fd, &err, &fput_needed); -- if (sock) { -- err = move_addr_to_kernel(umyaddr, addrlen, address); -- if (err >= 0) { -- err = security_socket_bind(sock, -- (struct sockaddr *)address, -- addrlen); -- if (!err) -- err = sock->ops->bind(sock, -- (struct sockaddr *) -- address, addrlen); -- } -- fput_light(sock->file, fput_needed); -- } -- return err; --} -- --/* -- * Perform a listen. Basically, we allow the protocol to do anything -- * necessary for a listen, and if that works, we mark the socket as -- * ready for listening. -- */ -- --int sysctl_somaxconn __read_mostly = SOMAXCONN; -- --asmlinkage long sys_listen(int fd, int backlog) --{ -- struct socket *sock; -- int err, fput_needed; -- -- sock = sockfd_lookup_light(fd, &err, &fput_needed); -- if (sock) { -- if ((unsigned)backlog > sysctl_somaxconn) -- backlog = sysctl_somaxconn; -- -- err = security_socket_listen(sock, backlog); -- if (!err) -- err = sock->ops->listen(sock, backlog); -- -- fput_light(sock->file, fput_needed); -- } -- return err; --} -- --/* -- * For accept, we attempt to create a new socket, set up the link -- * with the client, wake up the client, then return the new -- * connected fd. We collect the address of the connector in kernel -- * space and move it to user at the very end. This is unclean because -- * we open the socket then return an error. -- * -- * 1003.1g adds the ability to recvmsg() to query connection pending -- * status to recvmsg. We need to add that support in a way thats -- * clean when we restucture accept also. -- */ -- --asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, -- int __user *upeer_addrlen) --{ -- struct socket *sock, *newsock; -- struct file *newfile; -- int err, len, newfd, fput_needed; -- char address[MAX_SOCK_ADDR]; -- -- sock = sockfd_lookup_light(fd, &err, &fput_needed); -- if (!sock) -- goto out; -- -- err = -ENFILE; -- if (!(newsock = sock_alloc())) -- goto out_put; -- -- newsock->type = sock->type; -- newsock->ops = sock->ops; -- -- /* -- * We don't need try_module_get here, as the listening socket (sock) -- * has the protocol module (sock->ops->owner) held. -- */ -- __module_get(newsock->ops->owner); -- -- newfd = sock_alloc_fd(&newfile); -- if (unlikely(newfd < 0)) { -- err = newfd; -- sock_release(newsock); -- goto out_put; -- } -- -- err = sock_attach_fd(newsock, newfile); -- if (err < 0) -- goto out_fd_simple; -- -- err = security_socket_accept(sock, newsock); -- if (err) -- goto out_fd; -- -- err = sock->ops->accept(sock, newsock, sock->file->f_flags); -- if (err < 0) -- goto out_fd; -- -- if (upeer_sockaddr) { -- if (newsock->ops->getname(newsock, (struct sockaddr *)address, -- &len, 2) < 0) { -- err = -ECONNABORTED; -- goto out_fd; -- } -- err = move_addr_to_user(address, len, upeer_sockaddr, -- upeer_addrlen); -- if (err < 0) -- goto out_fd; -- } -- -- /* File flags are not inherited via accept() unlike another OSes. */ -- -- fd_install(newfd, newfile); -- err = newfd; -- -- security_socket_post_accept(sock, newsock); -- --out_put: -- fput_light(sock->file, fput_needed); --out: -- return err; --out_fd_simple: -- sock_release(newsock); -- put_filp(newfile); -- put_unused_fd(newfd); -- goto out_put; --out_fd: -- fput(newfile); -- put_unused_fd(newfd); -- goto out_put; --} -- --/* -- * Attempt to connect to a socket with the server address. The address -- * is in user space so we verify it is OK and move it to kernel space. -- * -- * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to -- * break bindings -- * -- * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and -- * other SEQPACKET protocols that take time to connect() as it doesn't -- * include the -EINPROGRESS status for such sockets. -- */ -- --asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, -- int addrlen) --{ -- struct socket *sock; -- char address[MAX_SOCK_ADDR]; -- int err, fput_needed; -- -- sock = sockfd_lookup_light(fd, &err, &fput_needed); -- if (!sock) -- goto out; -- err = move_addr_to_kernel(uservaddr, addrlen, address); -- if (err < 0) -- goto out_put; -- -- err = -- security_socket_connect(sock, (struct sockaddr *)address, addrlen); -- if (err) -- goto out_put; -- -- err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, -- sock->file->f_flags); --out_put: -- fput_light(sock->file, fput_needed); --out: -- return err; --} -- --/* -- * Get the local address ('name') of a socket object. Move the obtained -- * name to user space. -- */ -- --asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, -- int __user *usockaddr_len) --{ -- struct socket *sock; -- char address[MAX_SOCK_ADDR]; -- int len, err, fput_needed; -- -- sock = sockfd_lookup_light(fd, &err, &fput_needed); -- if (!sock) -- goto out; -- -- err = security_socket_getsockname(sock); -- if (err) -- goto out_put; -- -- err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0); -- if (err) -- goto out_put; -- err = move_addr_to_user(address, len, usockaddr, usockaddr_len); -- --out_put: -- fput_light(sock->file, fput_needed); --out: -- return err; --} -- --/* -- * Get the remote address ('name') of a socket object. Move the obtained -- * name to user space. -- */ -- --asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, -- int __user *usockaddr_len) --{ -- struct socket *sock; -- char address[MAX_SOCK_ADDR]; -- int len, err, fput_needed; -- -- sock = sockfd_lookup_light(fd, &err, &fput_needed); -- if (sock != NULL) { -- err = security_socket_getpeername(sock); -- if (err) { -- fput_light(sock->file, fput_needed); -- return err; -- } -- -- err = -- sock->ops->getname(sock, (struct sockaddr *)address, &len, -- 1); -- if (!err) -- err = move_addr_to_user(address, len, usockaddr, -- usockaddr_len); -- fput_light(sock->file, fput_needed); -- } -- return err; --} -- --/* -- * Send a datagram to a given address. We move the address into kernel -- * space and check the user space data area is readable before invoking -- * the protocol. -- */ -- --asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, -- unsigned flags, struct sockaddr __user *addr, -- int addr_len) --{ -- struct socket *sock; -- char address[MAX_SOCK_ADDR]; -- int err; -- struct msghdr msg; -- struct iovec iov; -- int fput_needed; -- struct file *sock_file; -- -- sock_file = fget_light(fd, &fput_needed); -- err = -EBADF; -- if (!sock_file) -- goto out; -- -- sock = sock_from_file(sock_file, &err); -- if (!sock) -- goto out_put; -- iov.iov_base = buff; -- iov.iov_len = len; -- msg.msg_name = NULL; -- msg.msg_iov = &iov; -- msg.msg_iovlen = 1; -- msg.msg_control = NULL; -- msg.msg_controllen = 0; -- msg.msg_namelen = 0; -- if (addr) { -- err = move_addr_to_kernel(addr, addr_len, address); -- if (err < 0) -- goto out_put; -- msg.msg_name = address; -- msg.msg_namelen = addr_len; -- } -- if (sock->file->f_flags & O_NONBLOCK) -- flags |= MSG_DONTWAIT; -- msg.msg_flags = flags; -- err = sock_sendmsg(sock, &msg, len); -- --out_put: -- fput_light(sock_file, fput_needed); --out: -- return err; --} -- --/* -- * Send a datagram down a socket. -- */ -- --asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) --{ -- return sys_sendto(fd, buff, len, flags, NULL, 0); --} -- --/* -- * Receive a frame from the socket and optionally record the address of the -- * sender. We verify the buffers are writable and if needed move the -- * sender address from kernel to user space. -- */ -- --asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, -- unsigned flags, struct sockaddr __user *addr, -- int __user *addr_len) --{ -- struct socket *sock; -- struct iovec iov; -- struct msghdr msg; -- char address[MAX_SOCK_ADDR]; -- int err, err2; -- struct file *sock_file; -- int fput_needed; -- -- sock_file = fget_light(fd, &fput_needed); -- err = -EBADF; -- if (!sock_file) -- goto out; -- -- sock = sock_from_file(sock_file, &err); -- if (!sock) -- goto out_put; -- -- msg.msg_control = NULL; -- msg.msg_controllen = 0; -- msg.msg_iovlen = 1; -- msg.msg_iov = &iov; -- iov.iov_len = size; -- iov.iov_base = ubuf; -- msg.msg_name = address; -- msg.msg_namelen = MAX_SOCK_ADDR; -- if (sock->file->f_flags & O_NONBLOCK) -- flags |= MSG_DONTWAIT; -- err = sock_recvmsg(sock, &msg, size, flags); -- -- if (err >= 0 && addr != NULL) { -- err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len); -- if (err2 < 0) -- err = err2; -- } --out_put: -- fput_light(sock_file, fput_needed); --out: -- return err; --} -- --/* -- * Receive a datagram from a socket. -- */ -- --asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, -- unsigned flags) --{ -- return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); --} -- --/* -- * Set a socket option. Because we don't know the option lengths we have -- * to pass the user mode parameter for the protocols to sort out. -- */ -- --asmlinkage long sys_setsockopt(int fd, int level, int optname, -- char __user *optval, int optlen) --{ -- int err, fput_needed; -- struct socket *sock; -- -- if (optlen < 0) -- return -EINVAL; -- -- sock = sockfd_lookup_light(fd, &err, &fput_needed); -- if (sock != NULL) { -- err = security_socket_setsockopt(sock, level, optname); -- if (err) -- goto out_put; -- -- if (level == SOL_SOCKET) -- err = -- sock_setsockopt(sock, level, optname, optval, -- optlen); -- else -- err = -- sock->ops->setsockopt(sock, level, optname, optval, -- optlen); --out_put: -- fput_light(sock->file, fput_needed); -- } -- return err; --} -- --/* -- * Get a socket option. Because we don't know the option lengths we have -- * to pass a user mode parameter for the protocols to sort out. -- */ -- --asmlinkage long sys_getsockopt(int fd, int level, int optname, -- char __user *optval, int __user *optlen) --{ -- int err, fput_needed; -- struct socket *sock; -- -- sock = sockfd_lookup_light(fd, &err, &fput_needed); -- if (sock != NULL) { -- err = security_socket_getsockopt(sock, level, optname); -- if (err) -- goto out_put; -- -- if (level == SOL_SOCKET) -- err = -- sock_getsockopt(sock, level, optname, optval, -- optlen); -- else -- err = -- sock->ops->getsockopt(sock, level, optname, optval, -- optlen); --out_put: -- fput_light(sock->file, fput_needed); -- } -- return err; --} -- --/* -- * Shutdown a socket. -- */ -- --asmlinkage long sys_shutdown(int fd, int how) --{ -- int err, fput_needed; -- struct socket *sock; -- -- sock = sockfd_lookup_light(fd, &err, &fput_needed); -- if (sock != NULL) { -- err = security_socket_shutdown(sock, how); -- if (!err) -- err = sock->ops->shutdown(sock, how); -- fput_light(sock->file, fput_needed); -- } -- return err; --} -- --/* A couple of helpful macros for getting the address of the 32/64 bit -- * fields which are the same type (int / unsigned) on our platforms. -- */ --#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) --#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) --#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) -- --/* -- * BSD sendmsg interface -- */ -- --asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) --{ -- struct compat_msghdr __user *msg_compat = -- (struct compat_msghdr __user *)msg; -- struct socket *sock; -- char address[MAX_SOCK_ADDR]; -- struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; -- unsigned char ctl[sizeof(struct cmsghdr) + 20] -- __attribute__ ((aligned(sizeof(__kernel_size_t)))); -- /* 20 is size of ipv6_pktinfo */ -- unsigned char *ctl_buf = ctl; -- struct msghdr msg_sys; -- int err, ctl_len, iov_size, total_len; -- int fput_needed; -- -- err = -EFAULT; -- if (MSG_CMSG_COMPAT & flags) { -- if (get_compat_msghdr(&msg_sys, msg_compat)) -- return -EFAULT; -- } -- else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) -- return -EFAULT; -- -- sock = sockfd_lookup_light(fd, &err, &fput_needed); -- if (!sock) -- goto out; -- -- /* do not move before msg_sys is valid */ -- err = -EMSGSIZE; -- if (msg_sys.msg_iovlen > UIO_MAXIOV) -- goto out_put; -- -- /* Check whether to allocate the iovec area */ -- err = -ENOMEM; -- iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); -- if (msg_sys.msg_iovlen > UIO_FASTIOV) { -- iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); -- if (!iov) -- goto out_put; -- } -- -- /* This will also move the address data into kernel space */ -- if (MSG_CMSG_COMPAT & flags) { -- err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ); -- } else -- err = verify_iovec(&msg_sys, iov, address, VERIFY_READ); -- if (err < 0) -- goto out_freeiov; -- total_len = err; -- -- err = -ENOBUFS; -- -- if (msg_sys.msg_controllen > INT_MAX) -- goto out_freeiov; -- ctl_len = msg_sys.msg_controllen; -- if ((MSG_CMSG_COMPAT & flags) && ctl_len) { -- err = -- cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, -- sizeof(ctl)); -- if (err) -- goto out_freeiov; -- ctl_buf = msg_sys.msg_control; -- ctl_len = msg_sys.msg_controllen; -- } else if (ctl_len) { -- if (ctl_len > sizeof(ctl)) { -- ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); -- if (ctl_buf == NULL) -- goto out_freeiov; -- } -- err = -EFAULT; -- /* -- * Careful! Before this, msg_sys.msg_control contains a user pointer. -- * Afterwards, it will be a kernel pointer. Thus the compiler-assisted -- * checking falls down on this. -- */ -- if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, -- ctl_len)) -- goto out_freectl; -- msg_sys.msg_control = ctl_buf; -- } -- msg_sys.msg_flags = flags; -- -- if (sock->file->f_flags & O_NONBLOCK) -- msg_sys.msg_flags |= MSG_DONTWAIT; -- err = sock_sendmsg(sock, &msg_sys, total_len); -- --out_freectl: -- if (ctl_buf != ctl) -- sock_kfree_s(sock->sk, ctl_buf, ctl_len); --out_freeiov: -- if (iov != iovstack) -- sock_kfree_s(sock->sk, iov, iov_size); --out_put: -- fput_light(sock->file, fput_needed); --out: -- return err; --} -- --/* -- * BSD recvmsg interface -- */ -- --asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, -- unsigned int flags) --{ -- struct compat_msghdr __user *msg_compat = -- (struct compat_msghdr __user *)msg; -- struct socket *sock; -- struct iovec iovstack[UIO_FASTIOV]; -- struct iovec *iov = iovstack; -- struct msghdr msg_sys; -- unsigned long cmsg_ptr; -- int err, iov_size, total_len, len; -- int fput_needed; -- -- /* kernel mode address */ -- char addr[MAX_SOCK_ADDR]; -- -- /* user mode address pointers */ -- struct sockaddr __user *uaddr; -- int __user *uaddr_len; -- -- if (MSG_CMSG_COMPAT & flags) { -- if (get_compat_msghdr(&msg_sys, msg_compat)) -- return -EFAULT; -- } -- else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) -- return -EFAULT; -- -- sock = sockfd_lookup_light(fd, &err, &fput_needed); -- if (!sock) -- goto out; -- -- err = -EMSGSIZE; -- if (msg_sys.msg_iovlen > UIO_MAXIOV) -- goto out_put; -- -- /* Check whether to allocate the iovec area */ -- err = -ENOMEM; -- iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); -- if (msg_sys.msg_iovlen > UIO_FASTIOV) { -- iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); -- if (!iov) -- goto out_put; -- } -- -- /* -- * Save the user-mode address (verify_iovec will change the -- * kernel msghdr to use the kernel address space) -- */ -- -- uaddr = (void __user *)msg_sys.msg_name; -- uaddr_len = COMPAT_NAMELEN(msg); -- if (MSG_CMSG_COMPAT & flags) { -- err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); -- } else -- err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE); -- if (err < 0) -- goto out_freeiov; -- total_len = err; -- -- cmsg_ptr = (unsigned long)msg_sys.msg_control; -- msg_sys.msg_flags = 0; -- if (MSG_CMSG_COMPAT & flags) -- msg_sys.msg_flags = MSG_CMSG_COMPAT; -- -- if (sock->file->f_flags & O_NONBLOCK) -- flags |= MSG_DONTWAIT; -- err = sock_recvmsg(sock, &msg_sys, total_len, flags); -- if (err < 0) -- goto out_freeiov; -- len = err; -- -- if (uaddr != NULL) { -- err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, -- uaddr_len); -- if (err < 0) -- goto out_freeiov; -- } -- err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), -- COMPAT_FLAGS(msg)); -- if (err) -- goto out_freeiov; -- if (MSG_CMSG_COMPAT & flags) -- err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, -- &msg_compat->msg_controllen); -- else -- err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, -- &msg->msg_controllen); -- if (err) -- goto out_freeiov; -- err = len; -- --out_freeiov: -- if (iov != iovstack) -- sock_kfree_s(sock->sk, iov, iov_size); --out_put: -- fput_light(sock->file, fput_needed); --out: -- return err; --} -- --#ifdef __ARCH_WANT_SYS_SOCKETCALL -- --/* Argument list sizes for sys_socketcall */ --#define AL(x) ((x) * sizeof(unsigned long)) --static const unsigned char nargs[18]={ -- AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), -- AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), -- AL(6),AL(2),AL(5),AL(5),AL(3),AL(3) --}; -- --#undef AL -- --/* -- * System call vectors. -- * -- * Argument checking cleaned up. Saved 20% in size. -- * This function doesn't need to set the kernel lock because -- * it is set by the callees. -- */ -- --asmlinkage long sys_socketcall(int call, unsigned long __user *args) --{ -- unsigned long a[6]; -- unsigned long a0, a1; -- int err; -- -- if (call < 1 || call > SYS_RECVMSG) -- return -EINVAL; -- -- /* copy_from_user should be SMP safe. */ -- if (copy_from_user(a, args, nargs[call])) -- return -EFAULT; -- -- err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); -- if (err) -- return err; -- -- a0 = a[0]; -- a1 = a[1]; -- -- switch (call) { -- case SYS_SOCKET: -- err = sys_socket(a0, a1, a[2]); -- break; -- case SYS_BIND: -- err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); -- break; -- case SYS_CONNECT: -- err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); -- break; -- case SYS_LISTEN: -- err = sys_listen(a0, a1); -- break; -- case SYS_ACCEPT: -- err = -- sys_accept(a0, (struct sockaddr __user *)a1, -- (int __user *)a[2]); -- break; -- case SYS_GETSOCKNAME: -- err = -- sys_getsockname(a0, (struct sockaddr __user *)a1, -- (int __user *)a[2]); -- break; -- case SYS_GETPEERNAME: -- err = -- sys_getpeername(a0, (struct sockaddr __user *)a1, -- (int __user *)a[2]); -- break; -- case SYS_SOCKETPAIR: -- err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); -- break; -- case SYS_SEND: -- err = sys_send(a0, (void __user *)a1, a[2], a[3]); -- break; -- case SYS_SENDTO: -- err = sys_sendto(a0, (void __user *)a1, a[2], a[3], -- (struct sockaddr __user *)a[4], a[5]); -- break; -- case SYS_RECV: -- err = sys_recv(a0, (void __user *)a1, a[2], a[3]); -- break; -- case SYS_RECVFROM: -- err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], -- (struct sockaddr __user *)a[4], -- (int __user *)a[5]); -- break; -- case SYS_SHUTDOWN: -- err = sys_shutdown(a0, a1); -- break; -- case SYS_SETSOCKOPT: -- err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); -- break; -- case SYS_GETSOCKOPT: -- err = -- sys_getsockopt(a0, a1, a[2], (char __user *)a[3], -- (int __user *)a[4]); -- break; -- case SYS_SENDMSG: -- err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); -- break; -- case SYS_RECVMSG: -- err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); -- break; -- default: -- err = -EINVAL; -- break; -- } -- return err; --} -- --#endif /* __ARCH_WANT_SYS_SOCKETCALL */ -- --/** -- * sock_register - add a socket protocol handler -- * @ops: description of protocol -- * -- * This function is called by a protocol handler that wants to -- * advertise its address family, and have it linked into the -- * socket interface. The value ops->family coresponds to the -- * socket system call protocol family. -- */ --int sock_register(const struct net_proto_family *ops) --{ -- int err; -- -- if (ops->family >= NPROTO) { -- printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, -- NPROTO); -- return -ENOBUFS; -- } -- -- spin_lock(&net_family_lock); -- if (net_families[ops->family]) -- err = -EEXIST; -- else { -- net_families[ops->family] = ops; -- err = 0; -- } -- spin_unlock(&net_family_lock); -- -- printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); -- return err; --} -- --/** -- * sock_unregister - remove a protocol handler -- * @family: protocol family to remove -- * -- * This function is called by a protocol handler that wants to -- * remove its address family, and have it unlinked from the -- * new socket creation. -- * -- * If protocol handler is a module, then it can use module reference -- * counts to protect against new references. If protocol handler is not -- * a module then it needs to provide its own protection in -- * the ops->create routine. -- */ --void sock_unregister(int family) --{ -- BUG_ON(family < 0 || family >= NPROTO); -- -- spin_lock(&net_family_lock); -- net_families[family] = NULL; -- spin_unlock(&net_family_lock); -- -- synchronize_rcu(); -- -- printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); --} -- --static int __init sock_init(void) --{ -- /* -- * Initialize sock SLAB cache. -- */ -- -- sk_init(); -- -- /* -- * Initialize skbuff SLAB cache -- */ -- skb_init(); -- -- /* -- * Initialize the protocols module. -- */ -- -- init_inodecache(); -- register_filesystem(&sock_fs_type); -- sock_mnt = kern_mount(&sock_fs_type); -- -- /* The real protocol initialization is performed in later initcalls. -- */ -- --#ifdef CONFIG_NETFILTER -- netfilter_init(); --#endif -- -- return 0; --} -- --core_initcall(sock_init); /* early initcall */ -- --#ifdef CONFIG_PROC_FS --void socket_seq_show(struct seq_file *seq) --{ -- int cpu; -- int counter = 0; -- -- for_each_possible_cpu(cpu) -- counter += per_cpu(sockets_in_use, cpu); -- -- /* It can be negative, by the way. 8) */ -- if (counter < 0) -- counter = 0; -- -- seq_printf(seq, "sockets: used %d\n", counter); --} --#endif /* CONFIG_PROC_FS */ -- --#ifdef CONFIG_COMPAT --static long compat_sock_ioctl(struct file *file, unsigned cmd, -- unsigned long arg) --{ -- struct socket *sock = file->private_data; -- int ret = -ENOIOCTLCMD; -- -- if (sock->ops->compat_ioctl) -- ret = sock->ops->compat_ioctl(sock, cmd, arg); -- -- return ret; --} --#endif -- --int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) --{ -- return sock->ops->bind(sock, addr, addrlen); --} -- --int kernel_listen(struct socket *sock, int backlog) --{ -- return sock->ops->listen(sock, backlog); --} -- --int kernel_accept(struct socket *sock, struct socket **newsock, int flags) --{ -- struct sock *sk = sock->sk; -- int err; -- -- err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, -- newsock); -- if (err < 0) -- goto done; -- -- err = sock->ops->accept(sock, *newsock, flags); -- if (err < 0) { -- sock_release(*newsock); -- goto done; -- } -- -- (*newsock)->ops = sock->ops; -- --done: -- return err; --} -- --int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, -- int flags) --{ -- return sock->ops->connect(sock, addr, addrlen, flags); --} -- --int kernel_getsockname(struct socket *sock, struct sockaddr *addr, -- int *addrlen) --{ -- return sock->ops->getname(sock, addr, addrlen, 0); --} -- --int kernel_getpeername(struct socket *sock, struct sockaddr *addr, -- int *addrlen) --{ -- return sock->ops->getname(sock, addr, addrlen, 1); --} -- --int kernel_getsockopt(struct socket *sock, int level, int optname, -- char *optval, int *optlen) --{ -- mm_segment_t oldfs = get_fs(); -- int err; -- -- set_fs(KERNEL_DS); -- if (level == SOL_SOCKET) -- err = sock_getsockopt(sock, level, optname, optval, optlen); -- else -- err = sock->ops->getsockopt(sock, level, optname, optval, -- optlen); -- set_fs(oldfs); -- return err; --} -- --int kernel_setsockopt(struct socket *sock, int level, int optname, -- char *optval, int optlen) --{ -- mm_segment_t oldfs = get_fs(); -- int err; -- -- set_fs(KERNEL_DS); -- if (level == SOL_SOCKET) -- err = sock_setsockopt(sock, level, optname, optval, optlen); -- else -- err = sock->ops->setsockopt(sock, level, optname, optval, -- optlen); -- set_fs(oldfs); -- return err; --} -- --int kernel_sendpage(struct socket *sock, struct page *page, int offset, -- size_t size, int flags) --{ -- if (sock->ops->sendpage) -- return sock->ops->sendpage(sock, page, offset, size, flags); -- -- return sock_no_sendpage(sock, page, offset, size, flags); --} -- --int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) --{ -- mm_segment_t oldfs = get_fs(); -- int err; -- -- set_fs(KERNEL_DS); -- err = sock->ops->ioctl(sock, cmd, arg); -- set_fs(oldfs); -- -- return err; --} -- --/* ABI emulation layers need these two */ --EXPORT_SYMBOL(move_addr_to_kernel); --EXPORT_SYMBOL(move_addr_to_user); --EXPORT_SYMBOL(sock_create); --EXPORT_SYMBOL(sock_create_kern); --EXPORT_SYMBOL(sock_create_lite); --EXPORT_SYMBOL(sock_map_fd); --EXPORT_SYMBOL(sock_recvmsg); --EXPORT_SYMBOL(sock_register); --EXPORT_SYMBOL(sock_release); --EXPORT_SYMBOL(sock_sendmsg); --EXPORT_SYMBOL(sock_unregister); --EXPORT_SYMBOL(sock_wake_async); --EXPORT_SYMBOL(sockfd_lookup); --EXPORT_SYMBOL(kernel_sendmsg); --EXPORT_SYMBOL(kernel_recvmsg); --EXPORT_SYMBOL(kernel_bind); --EXPORT_SYMBOL(kernel_listen); --EXPORT_SYMBOL(kernel_accept); --EXPORT_SYMBOL(kernel_connect); --EXPORT_SYMBOL(kernel_getsockname); --EXPORT_SYMBOL(kernel_getpeername); --EXPORT_SYMBOL(kernel_getsockopt); --EXPORT_SYMBOL(kernel_setsockopt); --EXPORT_SYMBOL(kernel_sendpage); --EXPORT_SYMBOL(kernel_sock_ioctl); diff -Nurb linux-2.6.22-570/net/sunrpc/auth.c linux-2.6.22-590/net/sunrpc/auth.c --- linux-2.6.22-570/net/sunrpc/auth.c 2008-03-20 13:25:46.000000000 -0400 +++ linux-2.6.22-590/net/sunrpc/auth.c 2008-03-20 13:28:08.000000000 -0400 @@ -202606,494 +188910,6 @@ diff -Nurb linux-2.6.22-570/rej linux-2.6.22-590/rej +vi -o ./net/bridge/br_if.c ./net/bridge/br_if.c.rej +vi -o ./net/sunrpc/auth_unix.c ./net/sunrpc/auth_unix.c.rej +vi -o ./scripts/checksyscalls.sh ./scripts/checksyscalls.sh.rej -diff -Nurb linux-2.6.22-570/scripts/Makefile.build.orig linux-2.6.22-590/scripts/Makefile.build.orig ---- linux-2.6.22-570/scripts/Makefile.build.orig 2007-07-08 19:32:17.000000000 -0400 -+++ linux-2.6.22-590/scripts/Makefile.build.orig 1969-12-31 19:00:00.000000000 -0500 -@@ -1,348 +0,0 @@ --# ========================================================================== --# Building --# ========================================================================== -- --src := $(obj) -- --PHONY := __build --__build: -- --# Read .config if it exist, otherwise ignore ---include include/config/auto.conf -- --include scripts/Kbuild.include -- --# The filename Kbuild has precedence over Makefile --kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) --include $(if $(wildcard $(kbuild-dir)/Kbuild), $(kbuild-dir)/Kbuild, $(kbuild-dir)/Makefile) -- --include scripts/Makefile.lib -- --ifdef host-progs --ifneq ($(hostprogs-y),$(host-progs)) --$(warning kbuild: $(obj)/Makefile - Usage of host-progs is deprecated. Please replace with hostprogs-y!) --hostprogs-y += $(host-progs) --endif --endif -- --# Do not include host rules unles needed --ifneq ($(hostprogs-y)$(hostprogs-m),) --include scripts/Makefile.host --endif -- --ifneq ($(KBUILD_SRC),) --# Create output directory if not already present --_dummy := $(shell [ -d $(obj) ] || mkdir -p $(obj)) -- --# Create directories for object files if directory does not exist --# Needed when obj-y := dir/file.o syntax is used --_dummy := $(foreach d,$(obj-dirs), $(shell [ -d $(d) ] || mkdir -p $(d))) --endif -- -- --ifdef EXTRA_TARGETS --$(warning kbuild: $(obj)/Makefile - Usage of EXTRA_TARGETS is obsolete in 2.6. Please fix!) --endif -- --ifdef build-targets --$(warning kbuild: $(obj)/Makefile - Usage of build-targets is obsolete in 2.6. Please fix!) --endif -- --ifdef export-objs --$(warning kbuild: $(obj)/Makefile - Usage of export-objs is obsolete in 2.6. Please fix!) --endif -- --ifdef O_TARGET --$(warning kbuild: $(obj)/Makefile - Usage of O_TARGET := $(O_TARGET) is obsolete in 2.6. Please fix!) --endif -- --ifdef L_TARGET --$(error kbuild: $(obj)/Makefile - Use of L_TARGET is replaced by lib-y in 2.6. Please fix!) --endif -- --ifdef list-multi --$(warning kbuild: $(obj)/Makefile - list-multi := $(list-multi) is obsolete in 2.6. Please fix!) --endif -- --ifndef obj --$(warning kbuild: Makefile.build is included improperly) --endif -- --# =========================================================================== -- --ifneq ($(strip $(lib-y) $(lib-m) $(lib-n) $(lib-)),) --lib-target := $(obj)/lib.a --endif -- --ifneq ($(strip $(obj-y) $(obj-m) $(obj-n) $(obj-) $(lib-target)),) --builtin-target := $(obj)/built-in.o --endif -- --# We keep a list of all modules in $(MODVERDIR) -- --__build: $(if $(KBUILD_BUILTIN),$(builtin-target) $(lib-target) $(extra-y)) \ -- $(if $(KBUILD_MODULES),$(obj-m)) \ -- $(subdir-ym) $(always) -- @: -- --# Linus' kernel sanity checking tool --ifneq ($(KBUILD_CHECKSRC),0) -- ifeq ($(KBUILD_CHECKSRC),2) -- quiet_cmd_force_checksrc = CHECK $< -- cmd_force_checksrc = $(CHECK) $(CHECKFLAGS) $(c_flags) $< ; -- else -- quiet_cmd_checksrc = CHECK $< -- cmd_checksrc = $(CHECK) $(CHECKFLAGS) $(c_flags) $< ; -- endif --endif -- -- --# Compile C sources (.c) --# --------------------------------------------------------------------------- -- --# Default is built-in, unless we know otherwise --modkern_cflags := $(CFLAGS_KERNEL) --quiet_modtag := $(empty) $(empty) -- --$(real-objs-m) : modkern_cflags := $(CFLAGS_MODULE) --$(real-objs-m:.o=.i) : modkern_cflags := $(CFLAGS_MODULE) --$(real-objs-m:.o=.s) : modkern_cflags := $(CFLAGS_MODULE) --$(real-objs-m:.o=.lst): modkern_cflags := $(CFLAGS_MODULE) -- --$(real-objs-m) : quiet_modtag := [M] --$(real-objs-m:.o=.i) : quiet_modtag := [M] --$(real-objs-m:.o=.s) : quiet_modtag := [M] --$(real-objs-m:.o=.lst): quiet_modtag := [M] -- --$(obj-m) : quiet_modtag := [M] -- --# Default for not multi-part modules --modname = $(basetarget) -- --$(multi-objs-m) : modname = $(modname-multi) --$(multi-objs-m:.o=.i) : modname = $(modname-multi) --$(multi-objs-m:.o=.s) : modname = $(modname-multi) --$(multi-objs-m:.o=.lst) : modname = $(modname-multi) --$(multi-objs-y) : modname = $(modname-multi) --$(multi-objs-y:.o=.i) : modname = $(modname-multi) --$(multi-objs-y:.o=.s) : modname = $(modname-multi) --$(multi-objs-y:.o=.lst) : modname = $(modname-multi) -- --quiet_cmd_cc_s_c = CC $(quiet_modtag) $@ --cmd_cc_s_c = $(CC) $(c_flags) -fverbose-asm -S -o $@ $< -- --$(obj)/%.s: $(src)/%.c FORCE -- $(call if_changed_dep,cc_s_c) -- --quiet_cmd_cc_i_c = CPP $(quiet_modtag) $@ --cmd_cc_i_c = $(CPP) $(c_flags) -o $@ $< -- --$(obj)/%.i: $(src)/%.c FORCE -- $(call if_changed_dep,cc_i_c) -- --quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@ --cmd_cc_symtypes_c = \ -- $(CPP) -D__GENKSYMS__ $(c_flags) $< \ -- | $(GENKSYMS) -T $@ >/dev/null; \ -- test -s $@ || rm -f $@ -- --$(obj)/%.symtypes : $(src)/%.c FORCE -- $(call if_changed_dep,cc_symtypes_c) -- --# C (.c) files --# The C file is compiled and updated dependency information is generated. --# (See cmd_cc_o_c + relevant part of rule_cc_o_c) -- --quiet_cmd_cc_o_c = CC $(quiet_modtag) $@ -- --ifndef CONFIG_MODVERSIONS --cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< -- --else --# When module versioning is enabled the following steps are executed: --# o compile a .tmp_.o from .c --# o if .tmp_.o doesn't contain a __ksymtab version, i.e. does --# not export symbols, we just rename .tmp_.o to .o and --# are done. --# o otherwise, we calculate symbol versions using the good old --# genksyms on the preprocessed source and postprocess them in a way --# that they are usable as a linker script --# o generate .o from .tmp_.o using the linker to --# replace the unresolved symbols __crc_exported_symbol with --# the actual value of the checksum generated by genksyms -- --cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $< --cmd_modversions = \ -- if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ -- $(CPP) -D__GENKSYMS__ $(c_flags) $< \ -- | $(GENKSYMS) $(if $(KBUILD_SYMTYPES), \ -- -T $(@D)/$(@F:.o=.symtypes)) -a $(ARCH) \ -- > $(@D)/.tmp_$(@F:.o=.ver); \ -- \ -- $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \ -- -T $(@D)/.tmp_$(@F:.o=.ver); \ -- rm -f $(@D)/.tmp_$(@F) $(@D)/.tmp_$(@F:.o=.ver); \ -- else \ -- mv -f $(@D)/.tmp_$(@F) $@; \ -- fi; --endif -- --define rule_cc_o_c -- $(call echo-cmd,checksrc) $(cmd_checksrc) \ -- $(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \ -- $(cmd_modversions) \ -- scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \ -- $(dot-target).tmp; \ -- rm -f $(depfile); \ -- mv -f $(dot-target).tmp $(dot-target).cmd --endef -- --# Built-in and composite module parts --$(obj)/%.o: $(src)/%.c FORCE -- $(call cmd,force_checksrc) -- $(call if_changed_rule,cc_o_c) -- --# Single-part modules are special since we need to mark them in $(MODVERDIR) -- --$(single-used-m): $(obj)/%.o: $(src)/%.c FORCE -- $(call cmd,force_checksrc) -- $(call if_changed_rule,cc_o_c) -- @{ echo $(@:.o=.ko); echo $@; } > $(MODVERDIR)/$(@F:.o=.mod) -- --quiet_cmd_cc_lst_c = MKLST $@ -- cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \ -- $(CONFIG_SHELL) $(srctree)/scripts/makelst $*.o \ -- System.map $(OBJDUMP) > $@ -- --$(obj)/%.lst: $(src)/%.c FORCE -- $(call if_changed_dep,cc_lst_c) -- --# Compile assembler sources (.S) --# --------------------------------------------------------------------------- -- --modkern_aflags := $(AFLAGS_KERNEL) -- --$(real-objs-m) : modkern_aflags := $(AFLAGS_MODULE) --$(real-objs-m:.o=.s): modkern_aflags := $(AFLAGS_MODULE) -- --quiet_cmd_as_s_S = CPP $(quiet_modtag) $@ --cmd_as_s_S = $(CPP) $(a_flags) -o $@ $< -- --$(obj)/%.s: $(src)/%.S FORCE -- $(call if_changed_dep,as_s_S) -- --quiet_cmd_as_o_S = AS $(quiet_modtag) $@ --cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< -- --$(obj)/%.o: $(src)/%.S FORCE -- $(call if_changed_dep,as_o_S) -- --targets += $(real-objs-y) $(real-objs-m) $(lib-y) --targets += $(extra-y) $(MAKECMDGOALS) $(always) -- --# Linker scripts preprocessor (.lds.S -> .lds) --# --------------------------------------------------------------------------- --quiet_cmd_cpp_lds_S = LDS $@ -- cmd_cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ -o $@ $< -- --$(obj)/%.lds: $(src)/%.lds.S FORCE -- $(call if_changed_dep,cpp_lds_S) -- --# Build the compiled-in targets --# --------------------------------------------------------------------------- -- --# To build objects in subdirs, we need to descend into the directories --$(sort $(subdir-obj-y)): $(subdir-ym) ; -- --# --# Rule to compile a set of .o files into one .o file --# --ifdef builtin-target --quiet_cmd_link_o_target = LD $@ --# If the list of objects to link is empty, just create an empty built-in.o --cmd_link_o_target = $(if $(strip $(obj-y)),\ -- $(LD) $(ld_flags) -r -o $@ $(filter $(obj-y), $^),\ -- rm -f $@; $(AR) rcs $@) -- --$(builtin-target): $(obj-y) FORCE -- $(call if_changed,link_o_target) -- --targets += $(builtin-target) --endif # builtin-target -- --# --# Rule to compile a set of .o files into one .a file --# --ifdef lib-target --quiet_cmd_link_l_target = AR $@ --cmd_link_l_target = rm -f $@; $(AR) $(EXTRA_ARFLAGS) rcs $@ $(lib-y) -- --$(lib-target): $(lib-y) FORCE -- $(call if_changed,link_l_target) -- --targets += $(lib-target) --endif -- --# --# Rule to link composite objects --# --# Composite objects are specified in kbuild makefile as follows: --# -objs := --# or --# -y := --link_multi_deps = \ --$(filter $(addprefix $(obj)/, \ --$($(subst $(obj)/,,$(@:.o=-objs))) \ --$($(subst $(obj)/,,$(@:.o=-y)))), $^) -- --quiet_cmd_link_multi-y = LD $@ --cmd_link_multi-y = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) -- --quiet_cmd_link_multi-m = LD [M] $@ --cmd_link_multi-m = $(LD) $(ld_flags) $(LDFLAGS_MODULE) -o $@ $(link_multi_deps) -- --# We would rather have a list of rules like --# foo.o: $(foo-objs) --# but that's not so easy, so we rather make all composite objects depend --# on the set of all their parts --$(multi-used-y) : %.o: $(multi-objs-y) FORCE -- $(call if_changed,link_multi-y) -- --$(multi-used-m) : %.o: $(multi-objs-m) FORCE -- $(call if_changed,link_multi-m) -- @{ echo $(@:.o=.ko); echo $(link_multi_deps); } > $(MODVERDIR)/$(@F:.o=.mod) -- --targets += $(multi-used-y) $(multi-used-m) -- -- --# Descending --# --------------------------------------------------------------------------- -- --PHONY += $(subdir-ym) --$(subdir-ym): -- $(Q)$(MAKE) $(build)=$@ -- --# Add FORCE to the prequisites of a target to force it to be always rebuilt. --# --------------------------------------------------------------------------- -- --PHONY += FORCE -- --FORCE: -- --# Read all saved command lines and dependencies for the $(targets) we --# may be building above, using $(if_changed{,_dep}). As an --# optimization, we don't need to read them if the target does not --# exist, we will rebuild anyway in that case. -- --targets := $(wildcard $(sort $(targets))) --cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) -- --ifneq ($(cmd_files),) -- include $(cmd_files) --endif -- -- --# Declare the contents of the .PHONY variable as phony. We keep that --# information in a variable se we can use it in if_changed and friends. -- --.PHONY: $(PHONY) -diff -Nurb linux-2.6.22-570/scripts/Makefile.modpost.orig linux-2.6.22-590/scripts/Makefile.modpost.orig ---- linux-2.6.22-570/scripts/Makefile.modpost.orig 2007-07-08 19:32:17.000000000 -0400 -+++ linux-2.6.22-590/scripts/Makefile.modpost.orig 1969-12-31 19:00:00.000000000 -0500 -@@ -1,132 +0,0 @@ --# =========================================================================== --# Module versions --# =========================================================================== --# --# Stage one of module building created the following: --# a) The individual .o files used for the module --# b) A .o file which is the .o files above linked together --# c) A .mod file in $(MODVERDIR)/, listing the name of the --# the preliminary .o file, plus all .o files -- --# Stage 2 is handled by this file and does the following --# 1) Find all modules from the files listed in $(MODVERDIR)/ --# 2) modpost is then used to --# 3) create one .mod.c file pr. module --# 4) create one Module.symvers file with CRC for all exported symbols --# 5) compile all .mod.c files --# 6) final link of the module to a file -- --# Step 3 is used to place certain information in the module's ELF --# section, including information such as: --# Version magic (see include/vermagic.h for full details) --# - Kernel release --# - SMP is CONFIG_SMP --# - PREEMPT is CONFIG_PREEMPT --# - GCC Version --# Module info --# - Module version (MODULE_VERSION) --# - Module alias'es (MODULE_ALIAS) --# - Module license (MODULE_LICENSE) --# - See include/linux/module.h for more details -- --# Step 4 is solely used to allow module versioning in external modules, --# where the CRC of each module is retrieved from the Module.symers file. -- --# KBUILD_MODPOST_WARN can be set to avoid error out in case of undefined --# symbols in the final module linking stage --# KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules. --# This is solely usefull to speed up test compiles --PHONY := _modpost --_modpost: __modpost -- --include include/config/auto.conf --include scripts/Kbuild.include --include scripts/Makefile.lib -- --kernelsymfile := $(objtree)/Module.symvers --modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers -- --# Step 1), find all modules listed in $(MODVERDIR)/ --__modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) --modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) -- --# Stop after building .o files if NOFINAL is set. Makes compile tests quicker --_modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules)) -- -- --# Step 2), invoke modpost --# Includes step 3,4 --quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules -- cmd_modpost = scripts/mod/modpost \ -- $(if $(CONFIG_MODVERSIONS),-m) \ -- $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a,) \ -- $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ -- $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ -- $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ -- $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) -- --PHONY += __modpost --__modpost: $(modules:.ko=.o) FORCE -- $(call cmd,modpost) $(wildcard vmlinux) $(filter-out FORCE,$^) -- --quiet_cmd_kernel-mod = MODPOST $@ -- cmd_kernel-mod = $(cmd_modpost) $(KBUILD_VMLINUX_OBJS) -- --PHONY += vmlinux --vmlinux: FORCE -- $(call cmd,kernel-mod) -- --# Declare generated files as targets for modpost --$(symverfile): __modpost ; --$(modules:.ko=.mod.c): __modpost ; -- -- --# Step 5), compile all *.mod.c files -- --# modname is set to make c_flags define KBUILD_MODNAME --modname = $(notdir $(@:.mod.o=)) -- --quiet_cmd_cc_o_c = CC $@ -- cmd_cc_o_c = $(CC) $(c_flags) $(CFLAGS_MODULE) \ -- -c -o $@ $< -- --$(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE -- $(call if_changed_dep,cc_o_c) -- --targets += $(modules:.ko=.mod.o) -- --# Step 6), final link of the modules --quiet_cmd_ld_ko_o = LD [M] $@ -- cmd_ld_ko_o = $(LD) $(LDFLAGS) $(LDFLAGS_MODULE) -o $@ \ -- $(filter-out FORCE,$^) -- --$(modules): %.ko :%.o %.mod.o FORCE -- $(call if_changed,ld_ko_o) -- --targets += $(modules) -- -- --# Add FORCE to the prequisites of a target to force it to be always rebuilt. --# --------------------------------------------------------------------------- -- --PHONY += FORCE -- --FORCE: -- --# Read all saved command lines and dependencies for the $(targets) we --# may be building above, using $(if_changed{,_dep}). As an --# optimization, we don't need to read them if the target does not --# exist, we will rebuild anyway in that case. -- --targets := $(wildcard $(sort $(targets))) --cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) -- --ifneq ($(cmd_files),) -- include $(cmd_files) --endif -- -- --# Declare the contents of the .PHONY variable as phony. We keep that --# information in a variable se we can use it in if_changed and friends. -- --.PHONY: $(PHONY) diff -Nurb linux-2.6.22-570/security/commoncap.c linux-2.6.22-590/security/commoncap.c --- linux-2.6.22-570/security/commoncap.c 2008-03-20 13:25:46.000000000 -0400 +++ linux-2.6.22-590/security/commoncap.c 2008-03-20 13:28:08.000000000 -0400 -- 2.43.0