ifdef CONFIG_DEBUG_INFO
CFLAGS += -g
endif
-diff -Nurb linux-2.6.22-570/Makefile.orig linux-2.6.22-590/Makefile.orig
---- linux-2.6.22-570/Makefile.orig 2008-03-20 13:25:40.000000000 -0400
-+++ linux-2.6.22-590/Makefile.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,1493 +0,0 @@
--VERSION = 2
--PATCHLEVEL = 6
--SUBLEVEL = 22
--EXTRAVERSION = .14
--NAME = Holy Dancing Manatees, Batman!
--
--# *DOCUMENTATION*
--# To see a list of typical targets execute "make help"
--# More info can be located in ./README
--# Comments in this file are targeted only to the developer, do not
--# expect to learn how to build the kernel reading this file.
--
--# Do not:
--# o use make's built-in rules and variables
--# (this increases performance and avoid hard-to-debug behavour);
--# o print "Entering directory ...";
--MAKEFLAGS += -rR --no-print-directory
--
--# We are using a recursive build, so we need to do a little thinking
--# to get the ordering right.
--#
--# Most importantly: sub-Makefiles should only ever modify files in
--# their own directory. If in some directory we have a dependency on
--# a file in another dir (which doesn't happen often, but it's often
--# unavoidable when linking the built-in.o targets which finally
--# turn into vmlinux), we will call a sub make in that other dir, and
--# after that we are sure that everything which is in that other dir
--# is now up to date.
--#
--# The only cases where we need to modify files which have global
--# effects are thus separated out and done before the recursive
--# descending is started. They are now explicitly listed as the
--# prepare rule.
--
--# To put more focus on warnings, be less verbose as default
--# Use 'make V=1' to see the full commands
--
--ifdef V
-- ifeq ("$(origin V)", "command line")
-- KBUILD_VERBOSE = $(V)
-- endif
--endif
--ifndef KBUILD_VERBOSE
-- KBUILD_VERBOSE = 0
--endif
--
--# Call a source code checker (by default, "sparse") as part of the
--# C compilation.
--#
--# Use 'make C=1' to enable checking of only re-compiled files.
--# Use 'make C=2' to enable checking of *all* source files, regardless
--# of whether they are re-compiled or not.
--#
--# See the file "Documentation/sparse.txt" for more details, including
--# where to get the "sparse" utility.
--
--ifdef C
-- ifeq ("$(origin C)", "command line")
-- KBUILD_CHECKSRC = $(C)
-- endif
--endif
--ifndef KBUILD_CHECKSRC
-- KBUILD_CHECKSRC = 0
--endif
--
--# Use make M=dir to specify directory of external module to build
--# Old syntax make ... SUBDIRS=$PWD is still supported
--# Setting the environment variable KBUILD_EXTMOD take precedence
--ifdef SUBDIRS
-- KBUILD_EXTMOD ?= $(SUBDIRS)
--endif
--ifdef M
-- ifeq ("$(origin M)", "command line")
-- KBUILD_EXTMOD := $(M)
-- endif
--endif
--
--
--# kbuild supports saving output files in a separate directory.
--# To locate output files in a separate directory two syntaxes are supported.
--# In both cases the working directory must be the root of the kernel src.
--# 1) O=
--# Use "make O=dir/to/store/output/files/"
--#
--# 2) Set KBUILD_OUTPUT
--# Set the environment variable KBUILD_OUTPUT to point to the directory
--# where the output files shall be placed.
--# export KBUILD_OUTPUT=dir/to/store/output/files/
--# make
--#
--# The O= assignment takes precedence over the KBUILD_OUTPUT environment
--# variable.
--
--
--# KBUILD_SRC is set on invocation of make in OBJ directory
--# KBUILD_SRC is not intended to be used by the regular user (for now)
--ifeq ($(KBUILD_SRC),)
--
--# OK, Make called in directory where kernel src resides
--# Do we want to locate output files in a separate directory?
--ifdef O
-- ifeq ("$(origin O)", "command line")
-- KBUILD_OUTPUT := $(O)
-- endif
--endif
--
--# That's our default target when none is given on the command line
--PHONY := _all
--_all:
--
--ifneq ($(KBUILD_OUTPUT),)
--# Invoke a second make in the output directory, passing relevant variables
--# check that the output directory actually exists
--saved-output := $(KBUILD_OUTPUT)
--KBUILD_OUTPUT := $(shell cd $(KBUILD_OUTPUT) && /bin/pwd)
--$(if $(KBUILD_OUTPUT),, \
-- $(error output directory "$(saved-output)" does not exist))
--
--PHONY += $(MAKECMDGOALS)
--
--$(filter-out _all,$(MAKECMDGOALS)) _all:
-- $(if $(KBUILD_VERBOSE:1=),@)$(MAKE) -C $(KBUILD_OUTPUT) \
-- KBUILD_SRC=$(CURDIR) \
-- KBUILD_EXTMOD="$(KBUILD_EXTMOD)" -f $(CURDIR)/Makefile $@
--
--# Leave processing to above invocation of make
--skip-makefile := 1
--endif # ifneq ($(KBUILD_OUTPUT),)
--endif # ifeq ($(KBUILD_SRC),)
--
--# We process the rest of the Makefile if this is the final invocation of make
--ifeq ($(skip-makefile),)
--
--# If building an external module we do not care about the all: rule
--# but instead _all depend on modules
--PHONY += all
--ifeq ($(KBUILD_EXTMOD),)
--_all: all
--else
--_all: modules
--endif
--
--srctree := $(if $(KBUILD_SRC),$(KBUILD_SRC),$(CURDIR))
--TOPDIR := $(srctree)
--# FIXME - TOPDIR is obsolete, use srctree/objtree
--objtree := $(CURDIR)
--src := $(srctree)
--obj := $(objtree)
--
--VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD))
--
--export srctree objtree VPATH TOPDIR
--
--
--# SUBARCH tells the usermode build what the underlying arch is. That is set
--# first, and if a usermode build is happening, the "ARCH=um" on the command
--# line overrides the setting of ARCH below. If a native build is happening,
--# then ARCH is assigned, getting whatever value it gets normally, and
--# SUBARCH is subsequently ignored.
--
--SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
-- -e s/arm.*/arm/ -e s/sa110/arm/ \
-- -e s/s390x/s390/ -e s/parisc64/parisc/ \
-- -e s/ppc.*/powerpc/ -e s/mips.*/mips/ )
--
--# Cross compiling and selecting different set of gcc/bin-utils
--# ---------------------------------------------------------------------------
--#
--# When performing cross compilation for other architectures ARCH shall be set
--# to the target architecture. (See arch/* for the possibilities).
--# ARCH can be set during invocation of make:
--# make ARCH=ia64
--# Another way is to have ARCH set in the environment.
--# The default ARCH is the host where make is executed.
--
--# CROSS_COMPILE specify the prefix used for all executables used
--# during compilation. Only gcc and related bin-utils executables
--# are prefixed with $(CROSS_COMPILE).
--# CROSS_COMPILE can be set on the command line
--# make CROSS_COMPILE=ia64-linux-
--# Alternatively CROSS_COMPILE can be set in the environment.
--# Default value for CROSS_COMPILE is not to prefix executables
--# Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile
--
--ARCH ?= $(SUBARCH)
--CROSS_COMPILE ?=
--
--# Architecture as present in compile.h
--UTS_MACHINE := $(ARCH)
--
--KCONFIG_CONFIG ?= .config
--
--# SHELL used by kbuild
--CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
-- else if [ -x /bin/bash ]; then echo /bin/bash; \
-- else echo sh; fi ; fi)
--
--HOSTCC = gcc
--HOSTCXX = g++
--HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
--HOSTCXXFLAGS = -O2
--
--# Decide whether to build built-in, modular, or both.
--# Normally, just do built-in.
--
--KBUILD_MODULES :=
--KBUILD_BUILTIN := 1
--
--# If we have only "make modules", don't compile built-in objects.
--# When we're building modules with modversions, we need to consider
--# the built-in objects during the descend as well, in order to
--# make sure the checksums are up to date before we record them.
--
--ifeq ($(MAKECMDGOALS),modules)
-- KBUILD_BUILTIN := $(if $(CONFIG_MODVERSIONS),1)
--endif
--
--# If we have "make <whatever> modules", compile modules
--# in addition to whatever we do anyway.
--# Just "make" or "make all" shall build modules as well
--
--ifneq ($(filter all _all modules,$(MAKECMDGOALS)),)
-- KBUILD_MODULES := 1
--endif
--
--ifeq ($(MAKECMDGOALS),)
-- KBUILD_MODULES := 1
--endif
--
--export KBUILD_MODULES KBUILD_BUILTIN
--export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD
--
--# Beautify output
--# ---------------------------------------------------------------------------
--#
--# Normally, we echo the whole command before executing it. By making
--# that echo $($(quiet)$(cmd)), we now have the possibility to set
--# $(quiet) to choose other forms of output instead, e.g.
--#
--# quiet_cmd_cc_o_c = Compiling $(RELDIR)/$@
--# cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
--#
--# If $(quiet) is empty, the whole command will be printed.
--# If it is set to "quiet_", only the short version will be printed.
--# If it is set to "silent_", nothing will be printed at all, since
--# the variable $(silent_cmd_cc_o_c) doesn't exist.
--#
--# A simple variant is to prefix commands with $(Q) - that's useful
--# for commands that shall be hidden in non-verbose mode.
--#
--# $(Q)ln $@ :<
--#
--# If KBUILD_VERBOSE equals 0 then the above command will be hidden.
--# If KBUILD_VERBOSE equals 1 then the above command is displayed.
--
--ifeq ($(KBUILD_VERBOSE),1)
-- quiet =
-- Q =
--else
-- quiet=quiet_
-- Q = @
--endif
--
--# If the user is running make -s (silent mode), suppress echoing of
--# commands
--
--ifneq ($(findstring s,$(MAKEFLAGS)),)
-- quiet=silent_
--endif
--
--export quiet Q KBUILD_VERBOSE
--
--
--# Look for make include files relative to root of kernel src
--MAKEFLAGS += --include-dir=$(srctree)
--
--# We need some generic definitions.
--include $(srctree)/scripts/Kbuild.include
--
--# Make variables (CC, etc...)
--
--AS = $(CROSS_COMPILE)as
--LD = $(CROSS_COMPILE)ld
--CC = $(CROSS_COMPILE)gcc
--CPP = $(CC) -E
--AR = $(CROSS_COMPILE)ar
--NM = $(CROSS_COMPILE)nm
--STRIP = $(CROSS_COMPILE)strip
--OBJCOPY = $(CROSS_COMPILE)objcopy
--OBJDUMP = $(CROSS_COMPILE)objdump
--AWK = awk
--GENKSYMS = scripts/genksyms/genksyms
--DEPMOD = /sbin/depmod
--KALLSYMS = scripts/kallsyms
--PERL = perl
--CHECK = sparse
--
--CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise $(CF)
--MODFLAGS = -DMODULE
--CFLAGS_MODULE = $(MODFLAGS)
--AFLAGS_MODULE = $(MODFLAGS)
--LDFLAGS_MODULE = -r
--CFLAGS_KERNEL =
--AFLAGS_KERNEL =
--
--
--# Use LINUXINCLUDE when you must reference the include/ directory.
--# Needed to be compatible with the O= option
--LINUXINCLUDE := -Iinclude \
-- $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \
-- -include include/linux/autoconf.h
--
--CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE)
--
--CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-- -fno-strict-aliasing -fno-common
--AFLAGS := -D__ASSEMBLY__
--
--# Read KERNELRELEASE from include/config/kernel.release (if it exists)
--KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null)
--KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
--
--export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
--export ARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
--export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE
--export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
--
--export CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
--export CFLAGS CFLAGS_KERNEL CFLAGS_MODULE
--export AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
--
--# When compiling out-of-tree modules, put MODVERDIR in the module
--# tree rather than in the kernel tree. The kernel tree might
--# even be read-only.
--export MODVERDIR := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_versions
--
--# Files to ignore in find ... statements
--
--RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS -o -name .pc -o -name .hg -o -name .git \) -prune -o
--export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn --exclude CVS --exclude .pc --exclude .hg --exclude .git
--
--# ===========================================================================
--# Rules shared between *config targets and build targets
--
--# Basic helpers built in scripts/
--PHONY += scripts_basic
--scripts_basic:
-- $(Q)$(MAKE) $(build)=scripts/basic
--
--# To avoid any implicit rule to kick in, define an empty command.
--scripts/basic/%: scripts_basic ;
--
--PHONY += outputmakefile
--# outputmakefile generates a Makefile in the output directory, if using a
--# separate output directory. This allows convenient use of make in the
--# output directory.
--outputmakefile:
--ifneq ($(KBUILD_SRC),)
-- $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile \
-- $(srctree) $(objtree) $(VERSION) $(PATCHLEVEL)
--endif
--
--# To make sure we do not include .config for any of the *config targets
--# catch them early, and hand them over to scripts/kconfig/Makefile
--# It is allowed to specify more targets when calling make, including
--# mixing *config targets and build targets.
--# For example 'make oldconfig all'.
--# Detect when mixed targets is specified, and make a second invocation
--# of make so .config is not included in this case either (for *config).
--
--no-dot-config-targets := clean mrproper distclean \
-- cscope TAGS tags help %docs check% \
-- include/linux/version.h headers_% \
-- kernelrelease kernelversion
--
--config-targets := 0
--mixed-targets := 0
--dot-config := 1
--
--ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),)
-- ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),)
-- dot-config := 0
-- endif
--endif
--
--ifeq ($(KBUILD_EXTMOD),)
-- ifneq ($(filter config %config,$(MAKECMDGOALS)),)
-- config-targets := 1
-- ifneq ($(filter-out config %config,$(MAKECMDGOALS)),)
-- mixed-targets := 1
-- endif
-- endif
--endif
--
--ifeq ($(mixed-targets),1)
--# ===========================================================================
--# We're called with mixed targets (*config and build targets).
--# Handle them one by one.
--
--%:: FORCE
-- $(Q)$(MAKE) -C $(srctree) KBUILD_SRC= $@
--
--else
--ifeq ($(config-targets),1)
--# ===========================================================================
--# *config targets only - make sure prerequisites are updated, and descend
--# in scripts/kconfig to make the *config target
--
--# Read arch specific Makefile to set KBUILD_DEFCONFIG as needed.
--# KBUILD_DEFCONFIG may point out an alternative default configuration
--# used for 'make defconfig'
--include $(srctree)/arch/$(ARCH)/Makefile
--export KBUILD_DEFCONFIG
--
--config %config: scripts_basic outputmakefile FORCE
-- $(Q)mkdir -p include/linux include/config
-- $(Q)$(MAKE) $(build)=scripts/kconfig $@
--
--else
--# ===========================================================================
--# Build targets only - this includes vmlinux, arch specific targets, clean
--# targets and others. In general all targets except *config targets.
--
--ifeq ($(KBUILD_EXTMOD),)
--# Additional helpers built in scripts/
--# Carefully list dependencies so we do not try to build scripts twice
--# in parallel
--PHONY += scripts
--scripts: scripts_basic include/config/auto.conf
-- $(Q)$(MAKE) $(build)=$(@)
--
--# Objects we will link into vmlinux / subdirs we need to visit
--init-y := init/
--drivers-y := drivers/ sound/
--net-y := net/
--libs-y := lib/
--core-y := usr/
--endif # KBUILD_EXTMOD
--
--ifeq ($(dot-config),1)
--# Read in config
---include include/config/auto.conf
--
--ifeq ($(KBUILD_EXTMOD),)
--# Read in dependencies to all Kconfig* files, make sure to run
--# oldconfig if changes are detected.
---include include/config/auto.conf.cmd
--
--# To avoid any implicit rule to kick in, define an empty command
--$(KCONFIG_CONFIG) include/config/auto.conf.cmd: ;
--
--# If .config is newer than include/config/auto.conf, someone tinkered
--# with it and forgot to run make oldconfig.
--# if auto.conf.cmd is missing then we are probably in a cleaned tree so
--# we execute the config step to be sure to catch updated Kconfig files
--include/config/auto.conf: $(KCONFIG_CONFIG) include/config/auto.conf.cmd
-- $(Q)$(MAKE) -f $(srctree)/Makefile silentoldconfig
--else
--# external modules needs include/linux/autoconf.h and include/config/auto.conf
--# but do not care if they are up-to-date. Use auto.conf to trigger the test
--PHONY += include/config/auto.conf
--
--include/config/auto.conf:
-- $(Q)test -e include/linux/autoconf.h -a -e $@ || ( \
-- echo; \
-- echo " ERROR: Kernel configuration is invalid."; \
-- echo " include/linux/autoconf.h or $@ are missing."; \
-- echo " Run 'make oldconfig && make prepare' on kernel src to fix it."; \
-- echo; \
-- /bin/false)
--
--endif # KBUILD_EXTMOD
--
--else
--# Dummy target needed, because used as prerequisite
--include/config/auto.conf: ;
--endif # $(dot-config)
--
--# The all: target is the default when no target is given on the
--# command line.
--# This allow a user to issue only 'make' to build a kernel including modules
--# Defaults vmlinux but it is usually overridden in the arch makefile
--all: vmlinux
--
--ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
--CFLAGS += -Os
--else
--CFLAGS += -O2
--endif
--
--include $(srctree)/arch/$(ARCH)/Makefile
--
--ifdef CONFIG_FRAME_POINTER
--CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
--else
--CFLAGS += -fomit-frame-pointer
--endif
--
--ifdef CONFIG_DEBUG_INFO
--CFLAGS += -g
--endif
--
--# Force gcc to behave correct even for buggy distributions
--CFLAGS += $(call cc-option, -fno-stack-protector)
--
--# arch Makefile may override CC so keep this after arch Makefile is included
--NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
--CHECKFLAGS += $(NOSTDINC_FLAGS)
--
--# warn about C99 declaration after statement
--CFLAGS += $(call cc-option,-Wdeclaration-after-statement,)
--
--# disable pointer signed / unsigned warnings in gcc 4.0
--CFLAGS += $(call cc-option,-Wno-pointer-sign,)
--
--# Default kernel image to build when no specific target is given.
--# KBUILD_IMAGE may be overruled on the command line or
--# set in the environment
--# Also any assignments in arch/$(ARCH)/Makefile take precedence over
--# this default value
--export KBUILD_IMAGE ?= vmlinux
--
--#
--# INSTALL_PATH specifies where to place the updated kernel and system map
--# images. Default is /boot, but you can set it to other values
--export INSTALL_PATH ?= /boot
--
--#
--# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory
--# relocations required by build roots. This is not defined in the
--# makefile but the argument can be passed to make if needed.
--#
--
--MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE)
--export MODLIB
--
--#
--# INSTALL_MOD_STRIP, if defined, will cause modules to be
--# stripped after they are installed. If INSTALL_MOD_STRIP is '1', then
--# the default option --strip-debug will be used. Otherwise,
--# INSTALL_MOD_STRIP will used as the options to the strip command.
--
--ifdef INSTALL_MOD_STRIP
--ifeq ($(INSTALL_MOD_STRIP),1)
--mod_strip_cmd = $(STRIP) --strip-debug
--else
--mod_strip_cmd = $(STRIP) $(INSTALL_MOD_STRIP)
--endif # INSTALL_MOD_STRIP=1
--else
--mod_strip_cmd = true
--endif # INSTALL_MOD_STRIP
--export mod_strip_cmd
--
--
--ifeq ($(KBUILD_EXTMOD),)
--core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
--
--vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
-- $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
-- $(net-y) $(net-m) $(libs-y) $(libs-m)))
--
--vmlinux-alldirs := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \
-- $(init-n) $(init-) \
-- $(core-n) $(core-) $(drivers-n) $(drivers-) \
-- $(net-n) $(net-) $(libs-n) $(libs-))))
--
--init-y := $(patsubst %/, %/built-in.o, $(init-y))
--core-y := $(patsubst %/, %/built-in.o, $(core-y))
--drivers-y := $(patsubst %/, %/built-in.o, $(drivers-y))
--net-y := $(patsubst %/, %/built-in.o, $(net-y))
--libs-y1 := $(patsubst %/, %/lib.a, $(libs-y))
--libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y))
--libs-y := $(libs-y1) $(libs-y2)
--
--# Build vmlinux
--# ---------------------------------------------------------------------------
--# vmlinux is built from the objects selected by $(vmlinux-init) and
--# $(vmlinux-main). Most are built-in.o files from top-level directories
--# in the kernel tree, others are specified in arch/$(ARCH)/Makefile.
--# Ordering when linking is important, and $(vmlinux-init) must be first.
--#
--# vmlinux
--# ^
--# |
--# +-< $(vmlinux-init)
--# | +--< init/version.o + more
--# |
--# +--< $(vmlinux-main)
--# | +--< driver/built-in.o mm/built-in.o + more
--# |
--# +-< kallsyms.o (see description in CONFIG_KALLSYMS section)
--#
--# vmlinux version (uname -v) cannot be updated during normal
--# descending-into-subdirs phase since we do not yet know if we need to
--# update vmlinux.
--# Therefore this step is delayed until just before final link of vmlinux -
--# except in the kallsyms case where it is done just before adding the
--# symbols to the kernel.
--#
--# System.map is generated to document addresses of all kernel symbols
--
--vmlinux-init := $(head-y) $(init-y)
--vmlinux-main := $(core-y) $(libs-y) $(drivers-y) $(net-y)
--vmlinux-all := $(vmlinux-init) $(vmlinux-main)
--vmlinux-lds := arch/$(ARCH)/kernel/vmlinux.lds
--export KBUILD_VMLINUX_OBJS := $(vmlinux-all)
--
--# Rule to link vmlinux - also used during CONFIG_KALLSYMS
--# May be overridden by arch/$(ARCH)/Makefile
--quiet_cmd_vmlinux__ ?= LD $@
-- cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \
-- -T $(vmlinux-lds) $(vmlinux-init) \
-- --start-group $(vmlinux-main) --end-group \
-- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^)
--
--# Generate new vmlinux version
--quiet_cmd_vmlinux_version = GEN .version
-- cmd_vmlinux_version = set -e; \
-- if [ ! -r .version ]; then \
-- rm -f .version; \
-- echo 1 >.version; \
-- else \
-- mv .version .old_version; \
-- expr 0$$(cat .old_version) + 1 >.version; \
-- fi; \
-- $(MAKE) $(build)=init
--
--# Generate System.map
--quiet_cmd_sysmap = SYSMAP
-- cmd_sysmap = $(CONFIG_SHELL) $(srctree)/scripts/mksysmap
--
--# Link of vmlinux
--# If CONFIG_KALLSYMS is set .version is already updated
--# Generate System.map and verify that the content is consistent
--# Use + in front of the vmlinux_version rule to silent warning with make -j2
--# First command is ':' to allow us to use + in front of the rule
--define rule_vmlinux__
-- :
-- $(if $(CONFIG_KALLSYMS),,+$(call cmd,vmlinux_version))
--
-- $(call cmd,vmlinux__)
-- $(Q)echo 'cmd_$@ := $(cmd_vmlinux__)' > $(@D)/.$(@F).cmd
--
-- $(Q)$(if $($(quiet)cmd_sysmap), \
-- echo ' $($(quiet)cmd_sysmap) System.map' &&) \
-- $(cmd_sysmap) $@ System.map; \
-- if [ $$? -ne 0 ]; then \
-- rm -f $@; \
-- /bin/false; \
-- fi;
-- $(verify_kallsyms)
--endef
--
--
--ifdef CONFIG_KALLSYMS
--# Generate section listing all symbols and add it into vmlinux $(kallsyms.o)
--# It's a three stage process:
--# o .tmp_vmlinux1 has all symbols and sections, but __kallsyms is
--# empty
--# Running kallsyms on that gives us .tmp_kallsyms1.o with
--# the right size - vmlinux version (uname -v) is updated during this step
--# o .tmp_vmlinux2 now has a __kallsyms section of the right size,
--# but due to the added section, some addresses have shifted.
--# From here, we generate a correct .tmp_kallsyms2.o
--# o The correct .tmp_kallsyms2.o is linked into the final vmlinux.
--# o Verify that the System.map from vmlinux matches the map from
--# .tmp_vmlinux2, just in case we did not generate kallsyms correctly.
--# o If CONFIG_KALLSYMS_EXTRA_PASS is set, do an extra pass using
--# .tmp_vmlinux3 and .tmp_kallsyms3.o. This is only meant as a
--# temporary bypass to allow the kernel to be built while the
--# maintainers work out what went wrong with kallsyms.
--
--ifdef CONFIG_KALLSYMS_EXTRA_PASS
--last_kallsyms := 3
--else
--last_kallsyms := 2
--endif
--
--kallsyms.o := .tmp_kallsyms$(last_kallsyms).o
--
--define verify_kallsyms
-- $(Q)$(if $($(quiet)cmd_sysmap), \
-- echo ' $($(quiet)cmd_sysmap) .tmp_System.map' &&) \
-- $(cmd_sysmap) .tmp_vmlinux$(last_kallsyms) .tmp_System.map
-- $(Q)cmp -s System.map .tmp_System.map || \
-- (echo Inconsistent kallsyms data; \
-- echo Try setting CONFIG_KALLSYMS_EXTRA_PASS; \
-- rm .tmp_kallsyms* ; /bin/false )
--endef
--
--# Update vmlinux version before link
--# Use + in front of this rule to silent warning about make -j1
--# First command is ':' to allow us to use + in front of this rule
--cmd_ksym_ld = $(cmd_vmlinux__)
--define rule_ksym_ld
-- :
-- +$(call cmd,vmlinux_version)
-- $(call cmd,vmlinux__)
-- $(Q)echo 'cmd_$@ := $(cmd_vmlinux__)' > $(@D)/.$(@F).cmd
--endef
--
--# Generate .S file with all kernel symbols
--quiet_cmd_kallsyms = KSYM $@
-- cmd_kallsyms = $(NM) -n $< | $(KALLSYMS) \
-- $(if $(CONFIG_KALLSYMS_ALL),--all-symbols) > $@
--
--.tmp_kallsyms1.o .tmp_kallsyms2.o .tmp_kallsyms3.o: %.o: %.S scripts FORCE
-- $(call if_changed_dep,as_o_S)
--
--.tmp_kallsyms%.S: .tmp_vmlinux% $(KALLSYMS)
-- $(call cmd,kallsyms)
--
--# .tmp_vmlinux1 must be complete except kallsyms, so update vmlinux version
--.tmp_vmlinux1: $(vmlinux-lds) $(vmlinux-all) FORCE
-- $(call if_changed_rule,ksym_ld)
--
--.tmp_vmlinux2: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms1.o FORCE
-- $(call if_changed,vmlinux__)
--
--.tmp_vmlinux3: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms2.o FORCE
-- $(call if_changed,vmlinux__)
--
--# Needs to visit scripts/ before $(KALLSYMS) can be used.
--$(KALLSYMS): scripts ;
--
--# Generate some data for debugging strange kallsyms problems
--debug_kallsyms: .tmp_map$(last_kallsyms)
--
--.tmp_map%: .tmp_vmlinux% FORCE
-- ($(OBJDUMP) -h $< | $(AWK) '/^ +[0-9]/{print $$4 " 0 " $$2}'; $(NM) $<) | sort > $@
--
--.tmp_map3: .tmp_map2
--
--.tmp_map2: .tmp_map1
--
--endif # ifdef CONFIG_KALLSYMS
--
--# vmlinux image - including updated kernel symbols
--vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE
--ifdef CONFIG_HEADERS_CHECK
-- $(Q)$(MAKE) -f $(srctree)/Makefile headers_check
--endif
-- $(call if_changed_rule,vmlinux__)
-- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@
-- $(Q)rm -f .old_version
--
--# The actual objects are generated when descending,
--# make sure no implicit rule kicks in
--$(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ;
--
--# Handle descending into subdirectories listed in $(vmlinux-dirs)
--# Preset locale variables to speed up the build process. Limit locale
--# tweaks to this spot to avoid wrong language settings when running
--# make menuconfig etc.
--# Error messages still appears in the original language
--
--PHONY += $(vmlinux-dirs)
--$(vmlinux-dirs): prepare scripts
-- $(Q)$(MAKE) $(build)=$@
--
--# Build the kernel release string
--#
--# The KERNELRELEASE value built here is stored in the file
--# include/config/kernel.release, and is used when executing several
--# make targets, such as "make install" or "make modules_install."
--#
--# The eventual kernel release string consists of the following fields,
--# shown in a hierarchical format to show how smaller parts are concatenated
--# to form the larger and final value, with values coming from places like
--# the Makefile, kernel config options, make command line options and/or
--# SCM tag information.
--#
--# $(KERNELVERSION)
--# $(VERSION) eg, 2
--# $(PATCHLEVEL) eg, 6
--# $(SUBLEVEL) eg, 18
--# $(EXTRAVERSION) eg, -rc6
--# $(localver-full)
--# $(localver)
--# localversion* (files without backups, containing '~')
--# $(CONFIG_LOCALVERSION) (from kernel config setting)
--# $(localver-auto) (only if CONFIG_LOCALVERSION_AUTO is set)
--# ./scripts/setlocalversion (SCM tag, if one exists)
--# $(LOCALVERSION) (from make command line if provided)
--#
--# Note how the final $(localver-auto) string is included *only* if the
--# kernel config option CONFIG_LOCALVERSION_AUTO is selected. Also, at the
--# moment, only git is supported but other SCMs can edit the script
--# scripts/setlocalversion and add the appropriate checks as needed.
--
--pattern = ".*/localversion[^~]*"
--string = $(shell cat /dev/null \
-- `find $(objtree) $(srctree) -maxdepth 1 -regex $(pattern) | sort -u`)
--
--localver = $(subst $(space),, $(string) \
-- $(patsubst "%",%,$(CONFIG_LOCALVERSION)))
--
--# If CONFIG_LOCALVERSION_AUTO is set scripts/setlocalversion is called
--# and if the SCM is know a tag from the SCM is appended.
--# The appended tag is determined by the SCM used.
--#
--# Currently, only git is supported.
--# Other SCMs can edit scripts/setlocalversion and add the appropriate
--# checks as needed.
--ifdef CONFIG_LOCALVERSION_AUTO
-- _localver-auto = $(shell $(CONFIG_SHELL) \
-- $(srctree)/scripts/setlocalversion $(srctree))
-- localver-auto = $(LOCALVERSION)$(_localver-auto)
--endif
--
--localver-full = $(localver)$(localver-auto)
--
--# Store (new) KERNELRELASE string in include/config/kernel.release
--kernelrelease = $(KERNELVERSION)$(localver-full)
--include/config/kernel.release: include/config/auto.conf FORCE
-- $(Q)rm -f $@
-- $(Q)echo $(kernelrelease) > $@
--
--
--# Things we need to do before we recursively start building the kernel
--# or the modules are listed in "prepare".
--# A multi level approach is used. prepareN is processed before prepareN-1.
--# archprepare is used in arch Makefiles and when processed asm symlink,
--# version.h and scripts_basic is processed / created.
--
--# Listed in dependency order
--PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3
--
--# prepare3 is used to check if we are building in a separate output directory,
--# and if so do:
--# 1) Check that make has not been executed in the kernel src $(srctree)
--# 2) Create the include2 directory, used for the second asm symlink
--prepare3: include/config/kernel.release
--ifneq ($(KBUILD_SRC),)
-- @echo ' Using $(srctree) as source for kernel'
-- $(Q)if [ -f $(srctree)/.config -o -d $(srctree)/include/config ]; then \
-- echo " $(srctree) is not clean, please run 'make mrproper'";\
-- echo " in the '$(srctree)' directory.";\
-- /bin/false; \
-- fi;
-- $(Q)if [ ! -d include2 ]; then mkdir -p include2; fi;
-- $(Q)ln -fsn $(srctree)/include/asm-$(ARCH) include2/asm
--endif
--
--# prepare2 creates a makefile if using a separate output directory
--prepare2: prepare3 outputmakefile
--
--prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \
-- include/asm include/config/auto.conf
--ifneq ($(KBUILD_MODULES),)
-- $(Q)mkdir -p $(MODVERDIR)
-- $(Q)rm -f $(MODVERDIR)/*
--endif
--
--archprepare: prepare1 scripts_basic
--
--prepare0: archprepare FORCE
-- $(Q)$(MAKE) $(build)=.
-- $(Q)$(MAKE) $(build)=. missing-syscalls
--
--# All the preparing..
--prepare: prepare0
--
--# Leave this as default for preprocessing vmlinux.lds.S, which is now
--# done in arch/$(ARCH)/kernel/Makefile
--
--export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH)
--
--# FIXME: The asm symlink changes when $(ARCH) changes. That's
--# hard to detect, but I suppose "make mrproper" is a good idea
--# before switching between archs anyway.
--
--include/asm:
-- @echo ' SYMLINK $@ -> include/asm-$(ARCH)'
-- $(Q)if [ ! -d include ]; then mkdir -p include; fi;
-- @ln -fsn asm-$(ARCH) $@
--
--# Generate some files
--# ---------------------------------------------------------------------------
--
--# KERNELRELEASE can change from a few different places, meaning version.h
--# needs to be updated, so this check is forced on all builds
--
--uts_len := 64
--define filechk_utsrelease.h
-- if [ `echo -n "$(KERNELRELEASE)" | wc -c ` -gt $(uts_len) ]; then \
-- echo '"$(KERNELRELEASE)" exceeds $(uts_len) characters' >&2; \
-- exit 1; \
-- fi; \
-- (echo \#define UTS_RELEASE \"$(KERNELRELEASE)\";)
--endef
--
--define filechk_version.h
-- (echo \#define LINUX_VERSION_CODE $(shell \
-- expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \
-- echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))';)
--endef
--
--include/linux/version.h: $(srctree)/Makefile FORCE
-- $(call filechk,version.h)
--
--include/linux/utsrelease.h: include/config/kernel.release FORCE
-- $(call filechk,utsrelease.h)
--
--# ---------------------------------------------------------------------------
--
--PHONY += depend dep
--depend dep:
-- @echo '*** Warning: make $@ is unnecessary now.'
--
--# ---------------------------------------------------------------------------
--# Kernel headers
--INSTALL_HDR_PATH=$(objtree)/usr
--export INSTALL_HDR_PATH
--
--HDRARCHES=$(filter-out generic,$(patsubst $(srctree)/include/asm-%/Kbuild,%,$(wildcard $(srctree)/include/asm-*/Kbuild)))
--
--PHONY += headers_install_all
--headers_install_all: include/linux/version.h scripts_basic FORCE
-- $(Q)$(MAKE) $(build)=scripts scripts/unifdef
-- $(Q)for arch in $(HDRARCHES); do \
-- $(MAKE) ARCH=$$arch -f $(srctree)/scripts/Makefile.headersinst obj=include BIASMDIR=-bi-$$arch ;\
-- done
--
--PHONY += headers_install
--headers_install: include/linux/version.h scripts_basic FORCE
-- @if [ ! -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \
-- echo '*** Error: Headers not exportable for this architecture ($(ARCH))'; \
-- exit 1 ; fi
-- $(Q)$(MAKE) $(build)=scripts scripts/unifdef
-- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.headersinst obj=include
--
--PHONY += headers_check_all
--headers_check_all: headers_install_all
-- $(Q)for arch in $(HDRARCHES); do \
-- $(MAKE) ARCH=$$arch -f $(srctree)/scripts/Makefile.headersinst obj=include BIASMDIR=-bi-$$arch HDRCHECK=1 ;\
-- done
--
--PHONY += headers_check
--headers_check: headers_install
-- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.headersinst obj=include HDRCHECK=1
--
--# ---------------------------------------------------------------------------
--# Modules
--
--ifdef CONFIG_MODULES
--
--# By default, build modules as well
--
--all: modules
--
--# Build modules
--
--PHONY += modules
--modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux)
-- @echo ' Building modules, stage 2.';
-- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
--
--
--# Target to prepare building external modules
--PHONY += modules_prepare
--modules_prepare: prepare scripts
--
--# Target to install modules
--PHONY += modules_install
--modules_install: _modinst_ _modinst_post
--
--PHONY += _modinst_
--_modinst_:
-- @if [ -z "`$(DEPMOD) -V 2>/dev/null | grep module-init-tools`" ]; then \
-- echo "Warning: you may need to install module-init-tools"; \
-- echo "See http://www.codemonkey.org.uk/docs/post-halloween-2.6.txt";\
-- sleep 1; \
-- fi
-- @rm -rf $(MODLIB)/kernel
-- @rm -f $(MODLIB)/source
-- @mkdir -p $(MODLIB)/kernel
-- @ln -s $(srctree) $(MODLIB)/source
-- @if [ ! $(objtree) -ef $(MODLIB)/build ]; then \
-- rm -f $(MODLIB)/build ; \
-- ln -s $(objtree) $(MODLIB)/build ; \
-- fi
-- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
--
--# If System.map exists, run depmod. This deliberately does not have a
--# dependency on System.map since that would run the dependency tree on
--# vmlinux. This depmod is only for convenience to give the initial
--# boot a modules.dep even before / is mounted read-write. However the
--# boot script depmod is the master version.
--ifeq "$(strip $(INSTALL_MOD_PATH))" ""
--depmod_opts :=
--else
--depmod_opts := -b $(INSTALL_MOD_PATH) -r
--endif
--PHONY += _modinst_post
--_modinst_post: _modinst_
-- if [ -r System.map -a -x $(DEPMOD) ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi
--
--else # CONFIG_MODULES
--
--# Modules not configured
--# ---------------------------------------------------------------------------
--
--modules modules_install: FORCE
-- @echo
-- @echo "The present kernel configuration has modules disabled."
-- @echo "Type 'make config' and enable loadable module support."
-- @echo "Then build a kernel with module support enabled."
-- @echo
-- @exit 1
--
--endif # CONFIG_MODULES
--
--###
--# Cleaning is done on three levels.
--# make clean Delete most generated files
--# Leave enough to build external modules
--# make mrproper Delete the current configuration, and all generated files
--# make distclean Remove editor backup files, patch leftover files and the like
--
--# Directories & files removed with 'make clean'
--CLEAN_DIRS += $(MODVERDIR)
--CLEAN_FILES += vmlinux System.map \
-- .tmp_kallsyms* .tmp_version .tmp_vmlinux* .tmp_System.map
--
--# Directories & files removed with 'make mrproper'
--MRPROPER_DIRS += include/config include2 usr/include
--MRPROPER_FILES += .config .config.old include/asm .version .old_version \
-- include/linux/autoconf.h include/linux/version.h \
-- include/linux/utsrelease.h \
-- Module.symvers tags TAGS cscope*
--
--# clean - Delete most, but leave enough to build external modules
--#
--clean: rm-dirs := $(CLEAN_DIRS)
--clean: rm-files := $(CLEAN_FILES)
--clean-dirs := $(addprefix _clean_,$(srctree) $(vmlinux-alldirs))
--
--PHONY += $(clean-dirs) clean archclean
--$(clean-dirs):
-- $(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@)
--
--clean: archclean $(clean-dirs)
-- $(call cmd,rmdirs)
-- $(call cmd,rmfiles)
-- @find . $(RCS_FIND_IGNORE) \
-- \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \
-- -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
-- -o -name '*.symtypes' \) \
-- -type f -print | xargs rm -f
--
--# mrproper - Delete all generated files, including .config
--#
--mrproper: rm-dirs := $(wildcard $(MRPROPER_DIRS))
--mrproper: rm-files := $(wildcard $(MRPROPER_FILES))
--mrproper-dirs := $(addprefix _mrproper_,Documentation/DocBook scripts)
--
--PHONY += $(mrproper-dirs) mrproper archmrproper
--$(mrproper-dirs):
-- $(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@)
--
--mrproper: clean archmrproper $(mrproper-dirs)
-- $(call cmd,rmdirs)
-- $(call cmd,rmfiles)
--
--# distclean
--#
--PHONY += distclean
--
--distclean: mrproper
-- @find $(srctree) $(RCS_FIND_IGNORE) \
-- \( -name '*.orig' -o -name '*.rej' -o -name '*~' \
-- -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \
-- -o -name '.*.rej' -o -size 0 \
-- -o -name '*%' -o -name '.*.cmd' -o -name 'core' \) \
-- -type f -print | xargs rm -f
--
--
--# Packaging of the kernel to various formats
--# ---------------------------------------------------------------------------
--# rpm target kept for backward compatibility
--package-dir := $(srctree)/scripts/package
--
--%pkg: include/config/kernel.release FORCE
-- $(Q)$(MAKE) $(build)=$(package-dir) $@
--rpm: include/config/kernel.release FORCE
-- $(Q)$(MAKE) $(build)=$(package-dir) $@
--
--
--# Brief documentation of the typical targets used
--# ---------------------------------------------------------------------------
--
--boards := $(wildcard $(srctree)/arch/$(ARCH)/configs/*_defconfig)
--boards := $(notdir $(boards))
--
--help:
-- @echo 'Cleaning targets:'
-- @echo ' clean - Remove most generated files but keep the config and'
-- @echo ' enough build support to build external modules'
-- @echo ' mrproper - Remove all generated files + config + various backup files'
-- @echo ' distclean - mrproper + remove editor backup and patch files'
-- @echo ''
-- @echo 'Configuration targets:'
-- @$(MAKE) -f $(srctree)/scripts/kconfig/Makefile help
-- @echo ''
-- @echo 'Other generic targets:'
-- @echo ' all - Build all targets marked with [*]'
-- @echo '* vmlinux - Build the bare kernel'
-- @echo '* modules - Build all modules'
-- @echo ' modules_install - Install all modules to INSTALL_MOD_PATH (default: /)'
-- @echo ' dir/ - Build all files in dir and below'
-- @echo ' dir/file.[ois] - Build specified target only'
-- @echo ' dir/file.ko - Build module including final link'
-- @echo ' rpm - Build a kernel as an RPM package'
-- @echo ' tags/TAGS - Generate tags file for editors'
-- @echo ' cscope - Generate cscope index'
-- @echo ' kernelrelease - Output the release version string'
-- @echo ' kernelversion - Output the version stored in Makefile'
-- @if [ -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \
-- echo ' headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'; \
-- echo ' (default: $(INSTALL_HDR_PATH))'; \
-- fi
-- @echo ''
-- @echo 'Static analysers'
-- @echo ' checkstack - Generate a list of stack hogs'
-- @echo ' namespacecheck - Name space analysis on compiled kernel'
-- @if [ -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \
-- echo ' headers_check - Sanity check on exported headers'; \
-- fi
-- @echo ''
-- @echo 'Kernel packaging:'
-- @$(MAKE) $(build)=$(package-dir) help
-- @echo ''
-- @echo 'Documentation targets:'
-- @$(MAKE) -f $(srctree)/Documentation/DocBook/Makefile dochelp
-- @echo ''
-- @echo 'Architecture specific targets ($(ARCH)):'
-- @$(if $(archhelp),$(archhelp),\
-- echo ' No architecture specific help defined for $(ARCH)')
-- @echo ''
-- @$(if $(boards), \
-- $(foreach b, $(boards), \
-- printf " %-24s - Build for %s\\n" $(b) $(subst _defconfig,,$(b));) \
-- echo '')
--
-- @echo ' make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build'
-- @echo ' make V=2 [targets] 2 => give reason for rebuild of target'
-- @echo ' make O=dir [targets] Locate all output files in "dir", including .config'
-- @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)'
-- @echo ' make C=2 [targets] Force check of all c source with $$CHECK'
-- @echo ''
-- @echo 'Execute "make" or "make all" to build all targets marked with [*] '
-- @echo 'For further info see the ./README file'
--
--
--# Documentation targets
--# ---------------------------------------------------------------------------
--%docs: scripts_basic FORCE
-- $(Q)$(MAKE) $(build)=Documentation/DocBook $@
--
--else # KBUILD_EXTMOD
--
--###
--# External module support.
--# When building external modules the kernel used as basis is considered
--# read-only, and no consistency checks are made and the make
--# system is not used on the basis kernel. If updates are required
--# in the basis kernel ordinary make commands (without M=...) must
--# be used.
--#
--# The following are the only valid targets when building external
--# modules.
--# make M=dir clean Delete all automatically generated files
--# make M=dir modules Make all modules in specified dir
--# make M=dir Same as 'make M=dir modules'
--# make M=dir modules_install
--# Install the modules built in the module directory
--# Assumes install directory is already created
--
--# We are always building modules
--KBUILD_MODULES := 1
--PHONY += crmodverdir
--crmodverdir:
-- $(Q)mkdir -p $(MODVERDIR)
-- $(Q)rm -f $(MODVERDIR)/*
--
--PHONY += $(objtree)/Module.symvers
--$(objtree)/Module.symvers:
-- @test -e $(objtree)/Module.symvers || ( \
-- echo; \
-- echo " WARNING: Symbol version dump $(objtree)/Module.symvers"; \
-- echo " is missing; modules will have no dependencies and modversions."; \
-- echo )
--
--module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD))
--PHONY += $(module-dirs) modules
--$(module-dirs): crmodverdir $(objtree)/Module.symvers
-- $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@)
--
--modules: $(module-dirs)
-- @echo ' Building modules, stage 2.';
-- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
--
--PHONY += modules_install
--modules_install: _emodinst_ _emodinst_post
--
--install-dir := $(if $(INSTALL_MOD_DIR),$(INSTALL_MOD_DIR),extra)
--PHONY += _emodinst_
--_emodinst_:
-- $(Q)mkdir -p $(MODLIB)/$(install-dir)
-- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
--
--# Run depmod only is we have System.map and depmod is executable
--quiet_cmd_depmod = DEPMOD $(KERNELRELEASE)
-- cmd_depmod = if [ -r System.map -a -x $(DEPMOD) ]; then \
-- $(DEPMOD) -ae -F System.map \
-- $(if $(strip $(INSTALL_MOD_PATH)), \
-- -b $(INSTALL_MOD_PATH) -r) \
-- $(KERNELRELEASE); \
-- fi
--
--PHONY += _emodinst_post
--_emodinst_post: _emodinst_
-- $(call cmd,depmod)
--
--clean-dirs := $(addprefix _clean_,$(KBUILD_EXTMOD))
--
--PHONY += $(clean-dirs) clean
--$(clean-dirs):
-- $(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@)
--
--clean: rm-dirs := $(MODVERDIR)
--clean: $(clean-dirs)
-- $(call cmd,rmdirs)
-- @find $(KBUILD_EXTMOD) $(RCS_FIND_IGNORE) \
-- \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \
-- -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \) \
-- -type f -print | xargs rm -f
--
--help:
-- @echo ' Building external modules.'
-- @echo ' Syntax: make -C path/to/kernel/src M=$$PWD target'
-- @echo ''
-- @echo ' modules - default target, build the module(s)'
-- @echo ' modules_install - install the module'
-- @echo ' clean - remove generated files in module directory only'
-- @echo ''
--
--# Dummies...
--PHONY += prepare scripts
--prepare: ;
--scripts: ;
--endif # KBUILD_EXTMOD
--
--# Generate tags for editors
--# ---------------------------------------------------------------------------
--
--#We want __srctree to totally vanish out when KBUILD_OUTPUT is not set
--#(which is the most common case IMHO) to avoid unneeded clutter in the big tags file.
--#Adding $(srctree) adds about 20M on i386 to the size of the output file!
--
--ifeq ($(src),$(obj))
--__srctree =
--else
--__srctree = $(srctree)/
--endif
--
--ifeq ($(ALLSOURCE_ARCHS),)
--ifeq ($(ARCH),um)
--ALLINCLUDE_ARCHS := $(ARCH) $(SUBARCH)
--else
--ALLINCLUDE_ARCHS := $(ARCH)
--endif
--else
--#Allow user to specify only ALLSOURCE_PATHS on the command line, keeping existing behavour.
--ALLINCLUDE_ARCHS := $(ALLSOURCE_ARCHS)
--endif
--
--ALLSOURCE_ARCHS := $(ARCH)
--
--define find-sources
-- ( for ARCH in $(ALLSOURCE_ARCHS) ; do \
-- find $(__srctree)arch/$${ARCH} $(RCS_FIND_IGNORE) \
-- -name $1 -print; \
-- done ; \
-- find $(__srctree)security/selinux/include $(RCS_FIND_IGNORE) \
-- -name $1 -print; \
-- find $(__srctree)include $(RCS_FIND_IGNORE) \
-- \( -name config -o -name 'asm-*' \) -prune \
-- -o -name $1 -print; \
-- for ARCH in $(ALLINCLUDE_ARCHS) ; do \
-- find $(__srctree)include/asm-$${ARCH} $(RCS_FIND_IGNORE) \
-- -name $1 -print; \
-- done ; \
-- find $(__srctree)include/asm-generic $(RCS_FIND_IGNORE) \
-- -name $1 -print; \
-- find $(__srctree) $(RCS_FIND_IGNORE) \
-- \( -name include -o -name arch \) -prune -o \
-- -name $1 -print; \
-- )
--endef
--
--define all-sources
-- $(call find-sources,'*.[chS]')
--endef
--define all-kconfigs
-- $(call find-sources,'Kconfig*')
--endef
--define all-defconfigs
-- $(call find-sources,'defconfig')
--endef
--
--define xtags
-- if $1 --version 2>&1 | grep -iq exuberant; then \
-- $(all-sources) | xargs $1 -a \
-- -I __initdata,__exitdata,__acquires,__releases \
-- -I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL \
-- --extra=+f --c-kinds=+px \
-- --regex-asm='/ENTRY\(([^)]*)\).*/\1/'; \
-- $(all-kconfigs) | xargs $1 -a \
-- --langdef=kconfig \
-- --language-force=kconfig \
-- --regex-kconfig='/^[[:blank:]]*config[[:blank:]]+([[:alnum:]_]+)/\1/'; \
-- $(all-defconfigs) | xargs -r $1 -a \
-- --langdef=dotconfig \
-- --language-force=dotconfig \
-- --regex-dotconfig='/^#?[[:blank:]]*(CONFIG_[[:alnum:]_]+)/\1/'; \
-- elif $1 --version 2>&1 | grep -iq emacs; then \
-- $(all-sources) | xargs $1 -a; \
-- $(all-kconfigs) | xargs $1 -a \
-- --regex='/^[ \t]*config[ \t]+\([a-zA-Z0-9_]+\)/\1/'; \
-- $(all-defconfigs) | xargs -r $1 -a \
-- --regex='/^#?[ \t]?\(CONFIG_[a-zA-Z0-9_]+\)/\1/'; \
-- else \
-- $(all-sources) | xargs $1 -a; \
-- fi
--endef
--
--quiet_cmd_cscope-file = FILELST cscope.files
-- cmd_cscope-file = (echo \-k; echo \-q; $(all-sources)) > cscope.files
--
--quiet_cmd_cscope = MAKE cscope.out
-- cmd_cscope = cscope -b
--
--cscope: FORCE
-- $(call cmd,cscope-file)
-- $(call cmd,cscope)
--
--quiet_cmd_TAGS = MAKE $@
--define cmd_TAGS
-- rm -f $@; \
-- $(call xtags,etags)
--endef
--
--TAGS: FORCE
-- $(call cmd,TAGS)
--
--quiet_cmd_tags = MAKE $@
--define cmd_tags
-- rm -f $@; \
-- $(call xtags,ctags)
--endef
--
--tags: FORCE
-- $(call cmd,tags)
--
--
--# Scripts to check various things for consistency
--# ---------------------------------------------------------------------------
--
--includecheck:
-- find * $(RCS_FIND_IGNORE) \
-- -name '*.[hcS]' -type f -print | sort \
-- | xargs $(PERL) -w scripts/checkincludes.pl
--
--versioncheck:
-- find * $(RCS_FIND_IGNORE) \
-- -name '*.[hcS]' -type f -print | sort \
-- | xargs $(PERL) -w scripts/checkversion.pl
--
--namespacecheck:
-- $(PERL) $(srctree)/scripts/namespace.pl
--
--endif #ifeq ($(config-targets),1)
--endif #ifeq ($(mixed-targets),1)
--
--PHONY += checkstack kernelrelease kernelversion
--
--# UML needs a little special treatment here. It wants to use the host
--# toolchain, so needs $(SUBARCH) passed to checkstack.pl. Everyone
--# else wants $(ARCH), including people doing cross-builds, which means
--# that $(SUBARCH) doesn't work here.
--ifeq ($(ARCH), um)
--CHECKSTACK_ARCH := $(SUBARCH)
--else
--CHECKSTACK_ARCH := $(ARCH)
--endif
--checkstack:
-- $(OBJDUMP) -d vmlinux $$(find . -name '*.ko') | \
-- $(PERL) $(src)/scripts/checkstack.pl $(CHECKSTACK_ARCH)
--
--kernelrelease:
-- $(if $(wildcard include/config/kernel.release), $(Q)echo $(KERNELRELEASE), \
-- $(error kernelrelease not valid - run 'make prepare' to update it))
--kernelversion:
-- @echo $(KERNELVERSION)
--
--# Single targets
--# ---------------------------------------------------------------------------
--# Single targets are compatible with:
--# - build whith mixed source and output
--# - build with separate output dir 'make O=...'
--# - external modules
--#
--# target-dir => where to store outputfile
--# build-dir => directory in kernel source tree to use
--
--ifeq ($(KBUILD_EXTMOD),)
-- build-dir = $(patsubst %/,%,$(dir $@))
-- target-dir = $(dir $@)
--else
-- zap-slash=$(filter-out .,$(patsubst %/,%,$(dir $@)))
-- build-dir = $(KBUILD_EXTMOD)$(if $(zap-slash),/$(zap-slash))
-- target-dir = $(if $(KBUILD_EXTMOD),$(dir $<),$(dir $@))
--endif
--
--%.s: %.c prepare scripts FORCE
-- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
--%.i: %.c prepare scripts FORCE
-- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
--%.o: %.c prepare scripts FORCE
-- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
--%.lst: %.c prepare scripts FORCE
-- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
--%.s: %.S prepare scripts FORCE
-- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
--%.o: %.S prepare scripts FORCE
-- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
--%.symtypes: %.c prepare scripts FORCE
-- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
--
--# Modules
--/ %/: prepare scripts FORCE
-- $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \
-- $(build)=$(build-dir)
--%.ko: prepare scripts FORCE
-- $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \
-- $(build)=$(build-dir) $(@:.ko=.o)
-- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
--
--# FIXME Should go into a make.lib or something
--# ===========================================================================
--
--quiet_cmd_rmdirs = $(if $(wildcard $(rm-dirs)),CLEAN $(wildcard $(rm-dirs)))
-- cmd_rmdirs = rm -rf $(rm-dirs)
--
--quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files)))
-- cmd_rmfiles = rm -f $(rm-files)
--
--
--a_flags = -Wp,-MD,$(depfile) $(AFLAGS) $(AFLAGS_KERNEL) \
-- $(NOSTDINC_FLAGS) $(CPPFLAGS) \
-- $(modkern_aflags) $(EXTRA_AFLAGS) $(AFLAGS_$(basetarget).o)
--
--quiet_cmd_as_o_S = AS $@
--cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $<
--
--# read all saved command lines
--
--targets := $(wildcard $(sort $(targets)))
--cmd_files := $(wildcard .*.cmd $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd))
--
--ifneq ($(cmd_files),)
-- $(cmd_files): ; # Do not try to update included dependency files
-- include $(cmd_files)
--endif
--
--# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=dir
--# Usage:
--# $(Q)$(MAKE) $(clean)=dir
--clean := -f $(if $(KBUILD_SRC),$(srctree)/)scripts/Makefile.clean obj
--
--endif # skip-makefile
--
--PHONY += FORCE
--FORCE:
--
--# Cancel implicit rules on top Makefile, `-rR' will apply to sub-makes.
--Makefile: ;
--
--# Declare the contents of the .PHONY variable as phony. We keep that
--# information in a variable se we can use it in if_changed and friends.
--.PHONY: $(PHONY)
diff -Nurb linux-2.6.22-570/arch/arm/Kconfig linux-2.6.22-590/arch/arm/Kconfig
--- linux-2.6.22-570/arch/arm/Kconfig 2008-03-20 13:25:43.000000000 -0400
+++ linux-2.6.22-590/arch/arm/Kconfig 2008-03-20 13:27:59.000000000 -0400
/*
* Internals. Dont't use..
-diff -Nurb linux-2.6.22-570/include/linux/vserver/network.h.orig.orig linux-2.6.22-590/include/linux/vserver/network.h.orig.orig
---- linux-2.6.22-570/include/linux/vserver/network.h.orig.orig 2008-03-20 13:25:49.000000000 -0400
-+++ linux-2.6.22-590/include/linux/vserver/network.h.orig.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,143 +0,0 @@
--#ifndef _VX_NETWORK_H
--#define _VX_NETWORK_H
--
--#include <linux/types.h>
--
--
--#define MAX_N_CONTEXT 65535 /* Arbitrary limit */
--
--
--/* network flags */
--
--#define NXF_INFO_PRIVATE 0x00000008
--
--#define NXF_SINGLE_IP 0x00000100
--#define NXF_LBACK_REMAP 0x00000200
--
--#define NXF_HIDE_NETIF 0x02000000
--#define NXF_HIDE_LBACK 0x04000000
--
--#define NXF_STATE_SETUP (1ULL << 32)
--#define NXF_STATE_ADMIN (1ULL << 34)
--
--#define NXF_SC_HELPER (1ULL << 36)
--#define NXF_PERSISTENT (1ULL << 38)
--
--#define NXF_ONE_TIME (0x0005ULL << 32)
--
--
--#define NXF_INIT_SET (__nxf_init_set())
--
--static inline uint64_t __nxf_init_set(void) {
-- return NXF_STATE_ADMIN
--#ifdef CONFIG_VSERVER_AUTO_LBACK
-- | NXF_LBACK_REMAP
-- | NXF_HIDE_LBACK
--#endif
--#ifdef CONFIG_VSERVER_AUTO_SINGLE
-- | NXF_SINGLE_IP
--#endif
-- | NXF_HIDE_NETIF;
--}
--
--
--/* network caps */
--
--#define NXC_RAW_ICMP 0x00000100
--
--
--/* address types */
--
--#define NXA_TYPE_IPV4 0x0001
--#define NXA_TYPE_IPV6 0x0002
--
--#define NXA_TYPE_NONE 0x0000
--#define NXA_TYPE_ANY 0x00FF
--
--#define NXA_TYPE_ADDR 0x0010
--#define NXA_TYPE_MASK 0x0020
--#define NXA_TYPE_RANGE 0x0040
--
--#define NXA_MASK_ALL (NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
--
--#define NXA_MOD_BCAST 0x0100
--#define NXA_MOD_LBACK 0x0200
--
--#define NXA_LOOPBACK 0x1000
--
--#define NXA_MASK_BIND (NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
--#define NXA_MASK_SHOW (NXA_MASK_ALL | NXA_LOOPBACK)
--
--#ifdef __KERNEL__
--
--#include <linux/list.h>
--#include <linux/spinlock.h>
--#include <linux/rcupdate.h>
--#include <linux/in.h>
--#include <linux/in6.h>
--#include <asm/atomic.h>
--
--struct nx_addr_v4 {
-- struct nx_addr_v4 *next;
-- struct in_addr ip[2];
-- struct in_addr mask;
-- uint16_t type;
-- uint16_t flags;
--};
--
--struct nx_addr_v6 {
-- struct nx_addr_v6 *next;
-- struct in6_addr ip;
-- struct in6_addr mask;
-- uint32_t prefix;
-- uint16_t type;
-- uint16_t flags;
--};
--
--struct nx_info {
-- struct hlist_node nx_hlist; /* linked list of nxinfos */
-- nid_t nx_id; /* vnet id */
-- atomic_t nx_usecnt; /* usage count */
-- atomic_t nx_tasks; /* tasks count */
-- int nx_state; /* context state */
--
-- uint64_t nx_flags; /* network flag word */
-- uint64_t nx_ncaps; /* network capabilities */
--
-- struct in_addr v4_lback; /* Loopback address */
-- struct in_addr v4_bcast; /* Broadcast address */
-- struct nx_addr_v4 v4; /* First/Single ipv4 address */
--#ifdef CONFIG_IPV6
-- struct nx_addr_v6 v6; /* First/Single ipv6 address */
--#endif
-- char nx_name[65]; /* network context name */
--};
--
--
--/* status flags */
--
--#define NXS_HASHED 0x0001
--#define NXS_SHUTDOWN 0x0100
--#define NXS_RELEASED 0x8000
--
--extern struct nx_info *lookup_nx_info(int);
--
--extern int get_nid_list(int, unsigned int *, int);
--extern int nid_is_hashed(nid_t);
--
--extern int nx_migrate_task(struct task_struct *, struct nx_info *);
--
--extern long vs_net_change(struct nx_info *, unsigned int);
--
--struct sock;
--
--
--#define NX_IPV4(n) ((n)->v4.type != NXA_TYPE_NONE)
--#ifdef CONFIG_IPV6
--#define NX_IPV6(n) ((n)->v6.type != NXA_TYPE_NONE)
--#else
--#define NX_IPV6(n) (0)
--#endif
--
--#endif /* __KERNEL__ */
--#endif /* _VX_NETWORK_H */
-diff -Nurb linux-2.6.22-570/include/net/addrconf.h linux-2.6.22-590/include/net/addrconf.h
--- linux-2.6.22-570/include/net/addrconf.h 2008-03-20 13:25:45.000000000 -0400
+++ linux-2.6.22-590/include/net/addrconf.h 2008-03-20 13:28:02.000000000 -0400
@@ -61,7 +61,7 @@
+ vma->vm_flags |= VM_CAN_INVALIDATE;
return 0;
}
-diff -Nurb linux-2.6.22-570/mm/shmem.c.orig linux-2.6.22-590/mm/shmem.c.orig
---- linux-2.6.22-570/mm/shmem.c.orig 2008-03-20 13:25:40.000000000 -0400
-+++ linux-2.6.22-590/mm/shmem.c.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,2619 +0,0 @@
--/*
-- * Resizable virtual memory filesystem for Linux.
-- *
-- * Copyright (C) 2000 Linus Torvalds.
-- * 2000 Transmeta Corp.
-- * 2000-2001 Christoph Rohland
-- * 2000-2001 SAP AG
-- * 2002 Red Hat Inc.
-- * Copyright (C) 2002-2005 Hugh Dickins.
-- * Copyright (C) 2002-2005 VERITAS Software Corporation.
-- * Copyright (C) 2004 Andi Kleen, SuSE Labs
-- *
-- * Extended attribute support for tmpfs:
-- * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
-- * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
-- *
-- * This file is released under the GPL.
-- */
--
--/*
-- * This virtual memory filesystem is heavily based on the ramfs. It
-- * extends ramfs by the ability to use swap and honor resource limits
-- * which makes it a completely usable filesystem.
-- */
--
--#include <linux/module.h>
--#include <linux/init.h>
--#include <linux/fs.h>
--#include <linux/xattr.h>
--#include <linux/generic_acl.h>
--#include <linux/mm.h>
--#include <linux/mman.h>
--#include <linux/file.h>
--#include <linux/swap.h>
--#include <linux/pagemap.h>
--#include <linux/string.h>
--#include <linux/slab.h>
--#include <linux/backing-dev.h>
--#include <linux/shmem_fs.h>
--#include <linux/mount.h>
--#include <linux/writeback.h>
--#include <linux/vfs.h>
--#include <linux/blkdev.h>
--#include <linux/security.h>
--#include <linux/swapops.h>
--#include <linux/mempolicy.h>
--#include <linux/namei.h>
--#include <linux/ctype.h>
--#include <linux/migrate.h>
--#include <linux/highmem.h>
--#include <linux/backing-dev.h>
--
--#include <asm/uaccess.h>
--#include <asm/div64.h>
--#include <asm/pgtable.h>
--
--/* This magic number is used in glibc for posix shared memory */
--#define TMPFS_MAGIC 0x01021994
--
--#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
--#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
--#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
--
--#define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
--#define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
--
--#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
--
--/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
--#define SHMEM_PAGEIN VM_READ
--#define SHMEM_TRUNCATE VM_WRITE
--
--/* Definition to limit shmem_truncate's steps between cond_rescheds */
--#define LATENCY_LIMIT 64
--
--/* Pretend that each entry is of this size in directory's i_size */
--#define BOGO_DIRENT_SIZE 20
--
--/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
--enum sgp_type {
-- SGP_QUICK, /* don't try more than file page cache lookup */
-- SGP_READ, /* don't exceed i_size, don't allocate page */
-- SGP_CACHE, /* don't exceed i_size, may allocate page */
-- SGP_WRITE, /* may exceed i_size, may allocate page */
--};
--
--static int shmem_getpage(struct inode *inode, unsigned long idx,
-- struct page **pagep, enum sgp_type sgp, int *type);
--
--static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
--{
-- /*
-- * The above definition of ENTRIES_PER_PAGE, and the use of
-- * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
-- * might be reconsidered if it ever diverges from PAGE_SIZE.
-- */
-- return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
--}
--
--static inline void shmem_dir_free(struct page *page)
--{
-- __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
--}
--
--static struct page **shmem_dir_map(struct page *page)
--{
-- return (struct page **)kmap_atomic(page, KM_USER0);
--}
--
--static inline void shmem_dir_unmap(struct page **dir)
--{
-- kunmap_atomic(dir, KM_USER0);
--}
--
--static swp_entry_t *shmem_swp_map(struct page *page)
--{
-- return (swp_entry_t *)kmap_atomic(page, KM_USER1);
--}
--
--static inline void shmem_swp_balance_unmap(void)
--{
-- /*
-- * When passing a pointer to an i_direct entry, to code which
-- * also handles indirect entries and so will shmem_swp_unmap,
-- * we must arrange for the preempt count to remain in balance.
-- * What kmap_atomic of a lowmem page does depends on config
-- * and architecture, so pretend to kmap_atomic some lowmem page.
-- */
-- (void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
--}
--
--static inline void shmem_swp_unmap(swp_entry_t *entry)
--{
-- kunmap_atomic(entry, KM_USER1);
--}
--
--static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
--{
-- return sb->s_fs_info;
--}
--
--/*
-- * shmem_file_setup pre-accounts the whole fixed size of a VM object,
-- * for shared memory and for shared anonymous (/dev/zero) mappings
-- * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
-- * consistent with the pre-accounting of private mappings ...
-- */
--static inline int shmem_acct_size(unsigned long flags, loff_t size)
--{
-- return (flags & VM_ACCOUNT)?
-- security_vm_enough_memory(VM_ACCT(size)): 0;
--}
--
--static inline void shmem_unacct_size(unsigned long flags, loff_t size)
--{
-- if (flags & VM_ACCOUNT)
-- vm_unacct_memory(VM_ACCT(size));
--}
--
--/*
-- * ... whereas tmpfs objects are accounted incrementally as
-- * pages are allocated, in order to allow huge sparse files.
-- * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
-- * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
-- */
--static inline int shmem_acct_block(unsigned long flags)
--{
-- return (flags & VM_ACCOUNT)?
-- 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE));
--}
--
--static inline void shmem_unacct_blocks(unsigned long flags, long pages)
--{
-- if (!(flags & VM_ACCOUNT))
-- vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
--}
--
--static const struct super_operations shmem_ops;
--static const struct address_space_operations shmem_aops;
--static const struct file_operations shmem_file_operations;
--static const struct inode_operations shmem_inode_operations;
--static const struct inode_operations shmem_dir_inode_operations;
--static const struct inode_operations shmem_special_inode_operations;
--static struct vm_operations_struct shmem_vm_ops;
--
--static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
-- .ra_pages = 0, /* No readahead */
-- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
-- .unplug_io_fn = default_unplug_io_fn,
--};
--
--static LIST_HEAD(shmem_swaplist);
--static DEFINE_SPINLOCK(shmem_swaplist_lock);
--
--static void shmem_free_blocks(struct inode *inode, long pages)
--{
-- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
-- if (sbinfo->max_blocks) {
-- spin_lock(&sbinfo->stat_lock);
-- sbinfo->free_blocks += pages;
-- inode->i_blocks -= pages*BLOCKS_PER_PAGE;
-- spin_unlock(&sbinfo->stat_lock);
-- }
--}
--
--/*
-- * shmem_recalc_inode - recalculate the size of an inode
-- *
-- * @inode: inode to recalc
-- *
-- * We have to calculate the free blocks since the mm can drop
-- * undirtied hole pages behind our back.
-- *
-- * But normally info->alloced == inode->i_mapping->nrpages + info->swapped
-- * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
-- *
-- * It has to be called with the spinlock held.
-- */
--static void shmem_recalc_inode(struct inode *inode)
--{
-- struct shmem_inode_info *info = SHMEM_I(inode);
-- long freed;
--
-- freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
-- if (freed > 0) {
-- info->alloced -= freed;
-- shmem_unacct_blocks(info->flags, freed);
-- shmem_free_blocks(inode, freed);
-- }
--}
--
--/*
-- * shmem_swp_entry - find the swap vector position in the info structure
-- *
-- * @info: info structure for the inode
-- * @index: index of the page to find
-- * @page: optional page to add to the structure. Has to be preset to
-- * all zeros
-- *
-- * If there is no space allocated yet it will return NULL when
-- * page is NULL, else it will use the page for the needed block,
-- * setting it to NULL on return to indicate that it has been used.
-- *
-- * The swap vector is organized the following way:
-- *
-- * There are SHMEM_NR_DIRECT entries directly stored in the
-- * shmem_inode_info structure. So small files do not need an addional
-- * allocation.
-- *
-- * For pages with index > SHMEM_NR_DIRECT there is the pointer
-- * i_indirect which points to a page which holds in the first half
-- * doubly indirect blocks, in the second half triple indirect blocks:
-- *
-- * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
-- * following layout (for SHMEM_NR_DIRECT == 16):
-- *
-- * i_indirect -> dir --> 16-19
-- * | +-> 20-23
-- * |
-- * +-->dir2 --> 24-27
-- * | +-> 28-31
-- * | +-> 32-35
-- * | +-> 36-39
-- * |
-- * +-->dir3 --> 40-43
-- * +-> 44-47
-- * +-> 48-51
-- * +-> 52-55
-- */
--static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
--{
-- unsigned long offset;
-- struct page **dir;
-- struct page *subdir;
--
-- if (index < SHMEM_NR_DIRECT) {
-- shmem_swp_balance_unmap();
-- return info->i_direct+index;
-- }
-- if (!info->i_indirect) {
-- if (page) {
-- info->i_indirect = *page;
-- *page = NULL;
-- }
-- return NULL; /* need another page */
-- }
--
-- index -= SHMEM_NR_DIRECT;
-- offset = index % ENTRIES_PER_PAGE;
-- index /= ENTRIES_PER_PAGE;
-- dir = shmem_dir_map(info->i_indirect);
--
-- if (index >= ENTRIES_PER_PAGE/2) {
-- index -= ENTRIES_PER_PAGE/2;
-- dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
-- index %= ENTRIES_PER_PAGE;
-- subdir = *dir;
-- if (!subdir) {
-- if (page) {
-- *dir = *page;
-- *page = NULL;
-- }
-- shmem_dir_unmap(dir);
-- return NULL; /* need another page */
-- }
-- shmem_dir_unmap(dir);
-- dir = shmem_dir_map(subdir);
-- }
--
-- dir += index;
-- subdir = *dir;
-- if (!subdir) {
-- if (!page || !(subdir = *page)) {
-- shmem_dir_unmap(dir);
-- return NULL; /* need a page */
-- }
-- *dir = subdir;
-- *page = NULL;
-- }
-- shmem_dir_unmap(dir);
-- return shmem_swp_map(subdir) + offset;
--}
--
--static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
--{
-- long incdec = value? 1: -1;
--
-- entry->val = value;
-- info->swapped += incdec;
-- if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) {
-- struct page *page = kmap_atomic_to_page(entry);
-- set_page_private(page, page_private(page) + incdec);
-- }
--}
--
--/*
-- * shmem_swp_alloc - get the position of the swap entry for the page.
-- * If it does not exist allocate the entry.
-- *
-- * @info: info structure for the inode
-- * @index: index of the page to find
-- * @sgp: check and recheck i_size? skip allocation?
-- */
--static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
--{
-- struct inode *inode = &info->vfs_inode;
-- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
-- struct page *page = NULL;
-- swp_entry_t *entry;
--
-- if (sgp != SGP_WRITE &&
-- ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
-- return ERR_PTR(-EINVAL);
--
-- while (!(entry = shmem_swp_entry(info, index, &page))) {
-- if (sgp == SGP_READ)
-- return shmem_swp_map(ZERO_PAGE(0));
-- /*
-- * Test free_blocks against 1 not 0, since we have 1 data
-- * page (and perhaps indirect index pages) yet to allocate:
-- * a waste to allocate index if we cannot allocate data.
-- */
-- if (sbinfo->max_blocks) {
-- spin_lock(&sbinfo->stat_lock);
-- if (sbinfo->free_blocks <= 1) {
-- spin_unlock(&sbinfo->stat_lock);
-- return ERR_PTR(-ENOSPC);
-- }
-- sbinfo->free_blocks--;
-- inode->i_blocks += BLOCKS_PER_PAGE;
-- spin_unlock(&sbinfo->stat_lock);
-- }
--
-- spin_unlock(&info->lock);
-- page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
-- if (page)
-- set_page_private(page, 0);
-- spin_lock(&info->lock);
--
-- if (!page) {
-- shmem_free_blocks(inode, 1);
-- return ERR_PTR(-ENOMEM);
-- }
-- if (sgp != SGP_WRITE &&
-- ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
-- entry = ERR_PTR(-EINVAL);
-- break;
-- }
-- if (info->next_index <= index)
-- info->next_index = index + 1;
-- }
-- if (page) {
-- /* another task gave its page, or truncated the file */
-- shmem_free_blocks(inode, 1);
-- shmem_dir_free(page);
-- }
-- if (info->next_index <= index && !IS_ERR(entry))
-- info->next_index = index + 1;
-- return entry;
--}
--
--/*
-- * shmem_free_swp - free some swap entries in a directory
-- *
-- * @dir: pointer to the directory
-- * @edir: pointer after last entry of the directory
-- * @punch_lock: pointer to spinlock when needed for the holepunch case
-- */
--static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
-- spinlock_t *punch_lock)
--{
-- spinlock_t *punch_unlock = NULL;
-- swp_entry_t *ptr;
-- int freed = 0;
--
-- for (ptr = dir; ptr < edir; ptr++) {
-- if (ptr->val) {
-- if (unlikely(punch_lock)) {
-- punch_unlock = punch_lock;
-- punch_lock = NULL;
-- spin_lock(punch_unlock);
-- if (!ptr->val)
-- continue;
-- }
-- free_swap_and_cache(*ptr);
-- *ptr = (swp_entry_t){0};
-- freed++;
-- }
-- }
-- if (punch_unlock)
-- spin_unlock(punch_unlock);
-- return freed;
--}
--
--static int shmem_map_and_free_swp(struct page *subdir, int offset,
-- int limit, struct page ***dir, spinlock_t *punch_lock)
--{
-- swp_entry_t *ptr;
-- int freed = 0;
--
-- ptr = shmem_swp_map(subdir);
-- for (; offset < limit; offset += LATENCY_LIMIT) {
-- int size = limit - offset;
-- if (size > LATENCY_LIMIT)
-- size = LATENCY_LIMIT;
-- freed += shmem_free_swp(ptr+offset, ptr+offset+size,
-- punch_lock);
-- if (need_resched()) {
-- shmem_swp_unmap(ptr);
-- if (*dir) {
-- shmem_dir_unmap(*dir);
-- *dir = NULL;
-- }
-- cond_resched();
-- ptr = shmem_swp_map(subdir);
-- }
-- }
-- shmem_swp_unmap(ptr);
-- return freed;
--}
--
--static void shmem_free_pages(struct list_head *next)
--{
-- struct page *page;
-- int freed = 0;
--
-- do {
-- page = container_of(next, struct page, lru);
-- next = next->next;
-- shmem_dir_free(page);
-- freed++;
-- if (freed >= LATENCY_LIMIT) {
-- cond_resched();
-- freed = 0;
-- }
-- } while (next);
--}
--
--static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
--{
-- struct shmem_inode_info *info = SHMEM_I(inode);
-- unsigned long idx;
-- unsigned long size;
-- unsigned long limit;
-- unsigned long stage;
-- unsigned long diroff;
-- struct page **dir;
-- struct page *topdir;
-- struct page *middir;
-- struct page *subdir;
-- swp_entry_t *ptr;
-- LIST_HEAD(pages_to_free);
-- long nr_pages_to_free = 0;
-- long nr_swaps_freed = 0;
-- int offset;
-- int freed;
-- int punch_hole;
-- spinlock_t *needs_lock;
-- spinlock_t *punch_lock;
-- unsigned long upper_limit;
--
-- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-- idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-- if (idx >= info->next_index)
-- return;
--
-- spin_lock(&info->lock);
-- info->flags |= SHMEM_TRUNCATE;
-- if (likely(end == (loff_t) -1)) {
-- limit = info->next_index;
-- upper_limit = SHMEM_MAX_INDEX;
-- info->next_index = idx;
-- needs_lock = NULL;
-- punch_hole = 0;
-- } else {
-- if (end + 1 >= inode->i_size) { /* we may free a little more */
-- limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
-- PAGE_CACHE_SHIFT;
-- upper_limit = SHMEM_MAX_INDEX;
-- } else {
-- limit = (end + 1) >> PAGE_CACHE_SHIFT;
-- upper_limit = limit;
-- }
-- needs_lock = &info->lock;
-- punch_hole = 1;
-- }
--
-- topdir = info->i_indirect;
-- if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) {
-- info->i_indirect = NULL;
-- nr_pages_to_free++;
-- list_add(&topdir->lru, &pages_to_free);
-- }
-- spin_unlock(&info->lock);
--
-- if (info->swapped && idx < SHMEM_NR_DIRECT) {
-- ptr = info->i_direct;
-- size = limit;
-- if (size > SHMEM_NR_DIRECT)
-- size = SHMEM_NR_DIRECT;
-- nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
-- }
--
-- /*
-- * If there are no indirect blocks or we are punching a hole
-- * below indirect blocks, nothing to be done.
-- */
-- if (!topdir || limit <= SHMEM_NR_DIRECT)
-- goto done2;
--
-- /*
-- * The truncation case has already dropped info->lock, and we're safe
-- * because i_size and next_index have already been lowered, preventing
-- * access beyond. But in the punch_hole case, we still need to take
-- * the lock when updating the swap directory, because there might be
-- * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
-- * shmem_writepage. However, whenever we find we can remove a whole
-- * directory page (not at the misaligned start or end of the range),
-- * we first NULLify its pointer in the level above, and then have no
-- * need to take the lock when updating its contents: needs_lock and
-- * punch_lock (either pointing to info->lock or NULL) manage this.
-- */
--
-- upper_limit -= SHMEM_NR_DIRECT;
-- limit -= SHMEM_NR_DIRECT;
-- idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
-- offset = idx % ENTRIES_PER_PAGE;
-- idx -= offset;
--
-- dir = shmem_dir_map(topdir);
-- stage = ENTRIES_PER_PAGEPAGE/2;
-- if (idx < ENTRIES_PER_PAGEPAGE/2) {
-- middir = topdir;
-- diroff = idx/ENTRIES_PER_PAGE;
-- } else {
-- dir += ENTRIES_PER_PAGE/2;
-- dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
-- while (stage <= idx)
-- stage += ENTRIES_PER_PAGEPAGE;
-- middir = *dir;
-- if (*dir) {
-- diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
-- ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
-- if (!diroff && !offset && upper_limit >= stage) {
-- if (needs_lock) {
-- spin_lock(needs_lock);
-- *dir = NULL;
-- spin_unlock(needs_lock);
-- needs_lock = NULL;
-- } else
-- *dir = NULL;
-- nr_pages_to_free++;
-- list_add(&middir->lru, &pages_to_free);
-- }
-- shmem_dir_unmap(dir);
-- dir = shmem_dir_map(middir);
-- } else {
-- diroff = 0;
-- offset = 0;
-- idx = stage;
-- }
-- }
--
-- for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) {
-- if (unlikely(idx == stage)) {
-- shmem_dir_unmap(dir);
-- dir = shmem_dir_map(topdir) +
-- ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
-- while (!*dir) {
-- dir++;
-- idx += ENTRIES_PER_PAGEPAGE;
-- if (idx >= limit)
-- goto done1;
-- }
-- stage = idx + ENTRIES_PER_PAGEPAGE;
-- middir = *dir;
-- if (punch_hole)
-- needs_lock = &info->lock;
-- if (upper_limit >= stage) {
-- if (needs_lock) {
-- spin_lock(needs_lock);
-- *dir = NULL;
-- spin_unlock(needs_lock);
-- needs_lock = NULL;
-- } else
-- *dir = NULL;
-- nr_pages_to_free++;
-- list_add(&middir->lru, &pages_to_free);
-- }
-- shmem_dir_unmap(dir);
-- cond_resched();
-- dir = shmem_dir_map(middir);
-- diroff = 0;
-- }
-- punch_lock = needs_lock;
-- subdir = dir[diroff];
-- if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
-- if (needs_lock) {
-- spin_lock(needs_lock);
-- dir[diroff] = NULL;
-- spin_unlock(needs_lock);
-- punch_lock = NULL;
-- } else
-- dir[diroff] = NULL;
-- nr_pages_to_free++;
-- list_add(&subdir->lru, &pages_to_free);
-- }
-- if (subdir && page_private(subdir) /* has swap entries */) {
-- size = limit - idx;
-- if (size > ENTRIES_PER_PAGE)
-- size = ENTRIES_PER_PAGE;
-- freed = shmem_map_and_free_swp(subdir,
-- offset, size, &dir, punch_lock);
-- if (!dir)
-- dir = shmem_dir_map(middir);
-- nr_swaps_freed += freed;
-- if (offset || punch_lock) {
-- spin_lock(&info->lock);
-- set_page_private(subdir,
-- page_private(subdir) - freed);
-- spin_unlock(&info->lock);
-- } else
-- BUG_ON(page_private(subdir) != freed);
-- }
-- offset = 0;
-- }
--done1:
-- shmem_dir_unmap(dir);
--done2:
-- if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
-- /*
-- * Call truncate_inode_pages again: racing shmem_unuse_inode
-- * may have swizzled a page in from swap since vmtruncate or
-- * generic_delete_inode did it, before we lowered next_index.
-- * Also, though shmem_getpage checks i_size before adding to
-- * cache, no recheck after: so fix the narrow window there too.
-- *
-- * Recalling truncate_inode_pages_range and unmap_mapping_range
-- * every time for punch_hole (which never got a chance to clear
-- * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
-- * yet hardly ever necessary: try to optimize them out later.
-- */
-- truncate_inode_pages_range(inode->i_mapping, start, end);
-- if (punch_hole)
-- unmap_mapping_range(inode->i_mapping, start,
-- end - start, 1);
-- }
--
-- spin_lock(&info->lock);
-- info->flags &= ~SHMEM_TRUNCATE;
-- info->swapped -= nr_swaps_freed;
-- if (nr_pages_to_free)
-- shmem_free_blocks(inode, nr_pages_to_free);
-- shmem_recalc_inode(inode);
-- spin_unlock(&info->lock);
--
-- /*
-- * Empty swap vector directory pages to be freed?
-- */
-- if (!list_empty(&pages_to_free)) {
-- pages_to_free.prev->next = NULL;
-- shmem_free_pages(pages_to_free.next);
-- }
--}
--
--static void shmem_truncate(struct inode *inode)
--{
-- shmem_truncate_range(inode, inode->i_size, (loff_t)-1);
--}
--
--static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
--{
-- struct inode *inode = dentry->d_inode;
-- struct page *page = NULL;
-- int error;
--
-- if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
-- if (attr->ia_size < inode->i_size) {
-- /*
-- * If truncating down to a partial page, then
-- * if that page is already allocated, hold it
-- * in memory until the truncation is over, so
-- * truncate_partial_page cannnot miss it were
-- * it assigned to swap.
-- */
-- if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
-- (void) shmem_getpage(inode,
-- attr->ia_size>>PAGE_CACHE_SHIFT,
-- &page, SGP_READ, NULL);
-- }
-- /*
-- * Reset SHMEM_PAGEIN flag so that shmem_truncate can
-- * detect if any pages might have been added to cache
-- * after truncate_inode_pages. But we needn't bother
-- * if it's being fully truncated to zero-length: the
-- * nrpages check is efficient enough in that case.
-- */
-- if (attr->ia_size) {
-- struct shmem_inode_info *info = SHMEM_I(inode);
-- spin_lock(&info->lock);
-- info->flags &= ~SHMEM_PAGEIN;
-- spin_unlock(&info->lock);
-- }
-- }
-- }
--
-- error = inode_change_ok(inode, attr);
-- if (!error)
-- error = inode_setattr(inode, attr);
--#ifdef CONFIG_TMPFS_POSIX_ACL
-- if (!error && (attr->ia_valid & ATTR_MODE))
-- error = generic_acl_chmod(inode, &shmem_acl_ops);
--#endif
-- if (page)
-- page_cache_release(page);
-- return error;
--}
--
--static void shmem_delete_inode(struct inode *inode)
--{
-- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
-- struct shmem_inode_info *info = SHMEM_I(inode);
--
-- if (inode->i_op->truncate == shmem_truncate) {
-- truncate_inode_pages(inode->i_mapping, 0);
-- shmem_unacct_size(info->flags, inode->i_size);
-- inode->i_size = 0;
-- shmem_truncate(inode);
-- if (!list_empty(&info->swaplist)) {
-- spin_lock(&shmem_swaplist_lock);
-- list_del_init(&info->swaplist);
-- spin_unlock(&shmem_swaplist_lock);
-- }
-- }
-- BUG_ON(inode->i_blocks);
-- if (sbinfo->max_inodes) {
-- spin_lock(&sbinfo->stat_lock);
-- sbinfo->free_inodes++;
-- spin_unlock(&sbinfo->stat_lock);
-- }
-- clear_inode(inode);
--}
--
--static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
--{
-- swp_entry_t *ptr;
--
-- for (ptr = dir; ptr < edir; ptr++) {
-- if (ptr->val == entry.val)
-- return ptr - dir;
-- }
-- return -1;
--}
--
--static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
--{
-- struct inode *inode;
-- unsigned long idx;
-- unsigned long size;
-- unsigned long limit;
-- unsigned long stage;
-- struct page **dir;
-- struct page *subdir;
-- swp_entry_t *ptr;
-- int offset;
--
-- idx = 0;
-- ptr = info->i_direct;
-- spin_lock(&info->lock);
-- limit = info->next_index;
-- size = limit;
-- if (size > SHMEM_NR_DIRECT)
-- size = SHMEM_NR_DIRECT;
-- offset = shmem_find_swp(entry, ptr, ptr+size);
-- if (offset >= 0) {
-- shmem_swp_balance_unmap();
-- goto found;
-- }
-- if (!info->i_indirect)
-- goto lost2;
--
-- dir = shmem_dir_map(info->i_indirect);
-- stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
--
-- for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
-- if (unlikely(idx == stage)) {
-- shmem_dir_unmap(dir-1);
-- dir = shmem_dir_map(info->i_indirect) +
-- ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
-- while (!*dir) {
-- dir++;
-- idx += ENTRIES_PER_PAGEPAGE;
-- if (idx >= limit)
-- goto lost1;
-- }
-- stage = idx + ENTRIES_PER_PAGEPAGE;
-- subdir = *dir;
-- shmem_dir_unmap(dir);
-- dir = shmem_dir_map(subdir);
-- }
-- subdir = *dir;
-- if (subdir && page_private(subdir)) {
-- ptr = shmem_swp_map(subdir);
-- size = limit - idx;
-- if (size > ENTRIES_PER_PAGE)
-- size = ENTRIES_PER_PAGE;
-- offset = shmem_find_swp(entry, ptr, ptr+size);
-- if (offset >= 0) {
-- shmem_dir_unmap(dir);
-- goto found;
-- }
-- shmem_swp_unmap(ptr);
-- }
-- }
--lost1:
-- shmem_dir_unmap(dir-1);
--lost2:
-- spin_unlock(&info->lock);
-- return 0;
--found:
-- idx += offset;
-- inode = &info->vfs_inode;
-- if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) {
-- info->flags |= SHMEM_PAGEIN;
-- shmem_swp_set(info, ptr + offset, 0);
-- }
-- shmem_swp_unmap(ptr);
-- spin_unlock(&info->lock);
-- /*
-- * Decrement swap count even when the entry is left behind:
-- * try_to_unuse will skip over mms, then reincrement count.
-- */
-- swap_free(entry);
-- return 1;
--}
--
--/*
-- * shmem_unuse() search for an eventually swapped out shmem page.
-- */
--int shmem_unuse(swp_entry_t entry, struct page *page)
--{
-- struct list_head *p, *next;
-- struct shmem_inode_info *info;
-- int found = 0;
--
-- spin_lock(&shmem_swaplist_lock);
-- list_for_each_safe(p, next, &shmem_swaplist) {
-- info = list_entry(p, struct shmem_inode_info, swaplist);
-- if (!info->swapped)
-- list_del_init(&info->swaplist);
-- else if (shmem_unuse_inode(info, entry, page)) {
-- /* move head to start search for next from here */
-- list_move_tail(&shmem_swaplist, &info->swaplist);
-- found = 1;
-- break;
-- }
-- }
-- spin_unlock(&shmem_swaplist_lock);
-- return found;
--}
--
--/*
-- * Move the page from the page cache to the swap cache.
-- */
--static int shmem_writepage(struct page *page, struct writeback_control *wbc)
--{
-- struct shmem_inode_info *info;
-- swp_entry_t *entry, swap;
-- struct address_space *mapping;
-- unsigned long index;
-- struct inode *inode;
--
-- BUG_ON(!PageLocked(page));
-- /*
-- * shmem_backing_dev_info's capabilities prevent regular writeback or
-- * sync from ever calling shmem_writepage; but a stacking filesystem
-- * may use the ->writepage of its underlying filesystem, in which case
-- * we want to do nothing when that underlying filesystem is tmpfs
-- * (writing out to swap is useful as a response to memory pressure, but
-- * of no use to stabilize the data) - just redirty the page, unlock it
-- * and claim success in this case. AOP_WRITEPAGE_ACTIVATE, and the
-- * page_mapped check below, must be avoided unless we're in reclaim.
-- */
-- if (!wbc->for_reclaim) {
-- set_page_dirty(page);
-- unlock_page(page);
-- return 0;
-- }
-- BUG_ON(page_mapped(page));
--
-- mapping = page->mapping;
-- index = page->index;
-- inode = mapping->host;
-- info = SHMEM_I(inode);
-- if (info->flags & VM_LOCKED)
-- goto redirty;
-- swap = get_swap_page();
-- if (!swap.val)
-- goto redirty;
--
-- spin_lock(&info->lock);
-- shmem_recalc_inode(inode);
-- if (index >= info->next_index) {
-- BUG_ON(!(info->flags & SHMEM_TRUNCATE));
-- goto unlock;
-- }
-- entry = shmem_swp_entry(info, index, NULL);
-- BUG_ON(!entry);
-- BUG_ON(entry->val);
--
-- if (move_to_swap_cache(page, swap) == 0) {
-- shmem_swp_set(info, entry, swap.val);
-- shmem_swp_unmap(entry);
-- spin_unlock(&info->lock);
-- if (list_empty(&info->swaplist)) {
-- spin_lock(&shmem_swaplist_lock);
-- /* move instead of add in case we're racing */
-- list_move_tail(&info->swaplist, &shmem_swaplist);
-- spin_unlock(&shmem_swaplist_lock);
-- }
-- unlock_page(page);
-- return 0;
-- }
--
-- shmem_swp_unmap(entry);
--unlock:
-- spin_unlock(&info->lock);
-- swap_free(swap);
--redirty:
-- set_page_dirty(page);
-- return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */
--}
--
--#ifdef CONFIG_NUMA
--static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
--{
-- char *nodelist = strchr(value, ':');
-- int err = 1;
--
-- if (nodelist) {
-- /* NUL-terminate policy string */
-- *nodelist++ = '\0';
-- if (nodelist_parse(nodelist, *policy_nodes))
-- goto out;
-- if (!nodes_subset(*policy_nodes, node_online_map))
-- goto out;
-- }
-- if (!strcmp(value, "default")) {
-- *policy = MPOL_DEFAULT;
-- /* Don't allow a nodelist */
-- if (!nodelist)
-- err = 0;
-- } else if (!strcmp(value, "prefer")) {
-- *policy = MPOL_PREFERRED;
-- /* Insist on a nodelist of one node only */
-- if (nodelist) {
-- char *rest = nodelist;
-- while (isdigit(*rest))
-- rest++;
-- if (!*rest)
-- err = 0;
-- }
-- } else if (!strcmp(value, "bind")) {
-- *policy = MPOL_BIND;
-- /* Insist on a nodelist */
-- if (nodelist)
-- err = 0;
-- } else if (!strcmp(value, "interleave")) {
-- *policy = MPOL_INTERLEAVE;
-- /* Default to nodes online if no nodelist */
-- if (!nodelist)
-- *policy_nodes = node_online_map;
-- err = 0;
-- }
--out:
-- /* Restore string for error message */
-- if (nodelist)
-- *--nodelist = ':';
-- return err;
--}
--
--static struct page *shmem_swapin_async(struct shared_policy *p,
-- swp_entry_t entry, unsigned long idx)
--{
-- struct page *page;
-- struct vm_area_struct pvma;
--
-- /* Create a pseudo vma that just contains the policy */
-- memset(&pvma, 0, sizeof(struct vm_area_struct));
-- pvma.vm_end = PAGE_SIZE;
-- pvma.vm_pgoff = idx;
-- pvma.vm_policy = mpol_shared_policy_lookup(p, idx);
-- page = read_swap_cache_async(entry, &pvma, 0);
-- mpol_free(pvma.vm_policy);
-- return page;
--}
--
--struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry,
-- unsigned long idx)
--{
-- struct shared_policy *p = &info->policy;
-- int i, num;
-- struct page *page;
-- unsigned long offset;
--
-- num = valid_swaphandles(entry, &offset);
-- for (i = 0; i < num; offset++, i++) {
-- page = shmem_swapin_async(p,
-- swp_entry(swp_type(entry), offset), idx);
-- if (!page)
-- break;
-- page_cache_release(page);
-- }
-- lru_add_drain(); /* Push any new pages onto the LRU now */
-- return shmem_swapin_async(p, entry, idx);
--}
--
--static struct page *
--shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info,
-- unsigned long idx)
--{
-- struct vm_area_struct pvma;
-- struct page *page;
--
-- memset(&pvma, 0, sizeof(struct vm_area_struct));
-- pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
-- pvma.vm_pgoff = idx;
-- pvma.vm_end = PAGE_SIZE;
-- page = alloc_page_vma(gfp | __GFP_ZERO, &pvma, 0);
-- mpol_free(pvma.vm_policy);
-- return page;
--}
--#else
--static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
--{
-- return 1;
--}
--
--static inline struct page *
--shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
--{
-- swapin_readahead(entry, 0, NULL);
-- return read_swap_cache_async(entry, NULL, 0);
--}
--
--static inline struct page *
--shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx)
--{
-- return alloc_page(gfp | __GFP_ZERO);
--}
--#endif
--
--/*
-- * shmem_getpage - either get the page from swap or allocate a new one
-- *
-- * If we allocate a new one we do not mark it dirty. That's up to the
-- * vm. If we swap it in we mark it dirty since we also free the swap
-- * entry since a page cannot live in both the swap and page cache
-- */
--static int shmem_getpage(struct inode *inode, unsigned long idx,
-- struct page **pagep, enum sgp_type sgp, int *type)
--{
-- struct address_space *mapping = inode->i_mapping;
-- struct shmem_inode_info *info = SHMEM_I(inode);
-- struct shmem_sb_info *sbinfo;
-- struct page *filepage = *pagep;
-- struct page *swappage;
-- swp_entry_t *entry;
-- swp_entry_t swap;
-- int error;
--
-- if (idx >= SHMEM_MAX_INDEX)
-- return -EFBIG;
-- /*
-- * Normally, filepage is NULL on entry, and either found
-- * uptodate immediately, or allocated and zeroed, or read
-- * in under swappage, which is then assigned to filepage.
-- * But shmem_prepare_write passes in a locked filepage,
-- * which may be found not uptodate by other callers too,
-- * and may need to be copied from the swappage read in.
-- */
--repeat:
-- if (!filepage)
-- filepage = find_lock_page(mapping, idx);
-- if (filepage && PageUptodate(filepage))
-- goto done;
-- error = 0;
-- if (sgp == SGP_QUICK)
-- goto failed;
--
-- spin_lock(&info->lock);
-- shmem_recalc_inode(inode);
-- entry = shmem_swp_alloc(info, idx, sgp);
-- if (IS_ERR(entry)) {
-- spin_unlock(&info->lock);
-- error = PTR_ERR(entry);
-- goto failed;
-- }
-- swap = *entry;
--
-- if (swap.val) {
-- /* Look it up and read it in.. */
-- swappage = lookup_swap_cache(swap);
-- if (!swappage) {
-- shmem_swp_unmap(entry);
-- /* here we actually do the io */
-- if (type && *type == VM_FAULT_MINOR) {
-- __count_vm_event(PGMAJFAULT);
-- *type = VM_FAULT_MAJOR;
-- }
-- spin_unlock(&info->lock);
-- swappage = shmem_swapin(info, swap, idx);
-- if (!swappage) {
-- spin_lock(&info->lock);
-- entry = shmem_swp_alloc(info, idx, sgp);
-- if (IS_ERR(entry))
-- error = PTR_ERR(entry);
-- else {
-- if (entry->val == swap.val)
-- error = -ENOMEM;
-- shmem_swp_unmap(entry);
-- }
-- spin_unlock(&info->lock);
-- if (error)
-- goto failed;
-- goto repeat;
-- }
-- wait_on_page_locked(swappage);
-- page_cache_release(swappage);
-- goto repeat;
-- }
--
-- /* We have to do this with page locked to prevent races */
-- if (TestSetPageLocked(swappage)) {
-- shmem_swp_unmap(entry);
-- spin_unlock(&info->lock);
-- wait_on_page_locked(swappage);
-- page_cache_release(swappage);
-- goto repeat;
-- }
-- if (PageWriteback(swappage)) {
-- shmem_swp_unmap(entry);
-- spin_unlock(&info->lock);
-- wait_on_page_writeback(swappage);
-- unlock_page(swappage);
-- page_cache_release(swappage);
-- goto repeat;
-- }
-- if (!PageUptodate(swappage)) {
-- shmem_swp_unmap(entry);
-- spin_unlock(&info->lock);
-- unlock_page(swappage);
-- page_cache_release(swappage);
-- error = -EIO;
-- goto failed;
-- }
--
-- if (filepage) {
-- shmem_swp_set(info, entry, 0);
-- shmem_swp_unmap(entry);
-- delete_from_swap_cache(swappage);
-- spin_unlock(&info->lock);
-- copy_highpage(filepage, swappage);
-- unlock_page(swappage);
-- page_cache_release(swappage);
-- flush_dcache_page(filepage);
-- SetPageUptodate(filepage);
-- set_page_dirty(filepage);
-- swap_free(swap);
-- } else if (!(error = move_from_swap_cache(
-- swappage, idx, mapping))) {
-- info->flags |= SHMEM_PAGEIN;
-- shmem_swp_set(info, entry, 0);
-- shmem_swp_unmap(entry);
-- spin_unlock(&info->lock);
-- filepage = swappage;
-- swap_free(swap);
-- } else {
-- shmem_swp_unmap(entry);
-- spin_unlock(&info->lock);
-- unlock_page(swappage);
-- page_cache_release(swappage);
-- if (error == -ENOMEM) {
-- /* let kswapd refresh zone for GFP_ATOMICs */
-- congestion_wait(WRITE, HZ/50);
-- }
-- goto repeat;
-- }
-- } else if (sgp == SGP_READ && !filepage) {
-- shmem_swp_unmap(entry);
-- filepage = find_get_page(mapping, idx);
-- if (filepage &&
-- (!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
-- spin_unlock(&info->lock);
-- wait_on_page_locked(filepage);
-- page_cache_release(filepage);
-- filepage = NULL;
-- goto repeat;
-- }
-- spin_unlock(&info->lock);
-- } else {
-- shmem_swp_unmap(entry);
-- sbinfo = SHMEM_SB(inode->i_sb);
-- if (sbinfo->max_blocks) {
-- spin_lock(&sbinfo->stat_lock);
-- if (sbinfo->free_blocks == 0 ||
-- shmem_acct_block(info->flags)) {
-- spin_unlock(&sbinfo->stat_lock);
-- spin_unlock(&info->lock);
-- error = -ENOSPC;
-- goto failed;
-- }
-- sbinfo->free_blocks--;
-- inode->i_blocks += BLOCKS_PER_PAGE;
-- spin_unlock(&sbinfo->stat_lock);
-- } else if (shmem_acct_block(info->flags)) {
-- spin_unlock(&info->lock);
-- error = -ENOSPC;
-- goto failed;
-- }
--
-- if (!filepage) {
-- spin_unlock(&info->lock);
-- filepage = shmem_alloc_page(mapping_gfp_mask(mapping),
-- info,
-- idx);
-- if (!filepage) {
-- shmem_unacct_blocks(info->flags, 1);
-- shmem_free_blocks(inode, 1);
-- error = -ENOMEM;
-- goto failed;
-- }
--
-- spin_lock(&info->lock);
-- entry = shmem_swp_alloc(info, idx, sgp);
-- if (IS_ERR(entry))
-- error = PTR_ERR(entry);
-- else {
-- swap = *entry;
-- shmem_swp_unmap(entry);
-- }
-- if (error || swap.val || 0 != add_to_page_cache_lru(
-- filepage, mapping, idx, GFP_ATOMIC)) {
-- spin_unlock(&info->lock);
-- page_cache_release(filepage);
-- shmem_unacct_blocks(info->flags, 1);
-- shmem_free_blocks(inode, 1);
-- filepage = NULL;
-- if (error)
-- goto failed;
-- goto repeat;
-- }
-- info->flags |= SHMEM_PAGEIN;
-- }
--
-- info->alloced++;
-- spin_unlock(&info->lock);
-- flush_dcache_page(filepage);
-- SetPageUptodate(filepage);
-- }
--done:
-- if (*pagep != filepage) {
-- unlock_page(filepage);
-- *pagep = filepage;
-- }
-- return 0;
--
--failed:
-- if (*pagep != filepage) {
-- unlock_page(filepage);
-- page_cache_release(filepage);
-- }
-- return error;
--}
--
--static struct page *shmem_nopage(struct vm_area_struct *vma,
-- unsigned long address, int *type)
--{
-- struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
-- struct page *page = NULL;
-- unsigned long idx;
-- int error;
--
-- idx = (address - vma->vm_start) >> PAGE_SHIFT;
-- idx += vma->vm_pgoff;
-- idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
-- if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode))
-- return NOPAGE_SIGBUS;
--
-- error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
-- if (error)
-- return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
--
-- mark_page_accessed(page);
-- return page;
--}
--
--static int shmem_populate(struct vm_area_struct *vma,
-- unsigned long addr, unsigned long len,
-- pgprot_t prot, unsigned long pgoff, int nonblock)
--{
-- struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
-- struct mm_struct *mm = vma->vm_mm;
-- enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
-- unsigned long size;
--
-- size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-- if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
-- return -EINVAL;
--
-- while ((long) len > 0) {
-- struct page *page = NULL;
-- int err;
-- /*
-- * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
-- */
-- err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
-- if (err)
-- return err;
-- /* Page may still be null, but only if nonblock was set. */
-- if (page) {
-- mark_page_accessed(page);
-- err = install_page(mm, vma, addr, page, prot);
-- if (err) {
-- page_cache_release(page);
-- return err;
-- }
-- } else if (vma->vm_flags & VM_NONLINEAR) {
-- /* No page was found just because we can't read it in
-- * now (being here implies nonblock != 0), but the page
-- * may exist, so set the PTE to fault it in later. */
-- err = install_file_pte(mm, vma, addr, pgoff, prot);
-- if (err)
-- return err;
-- }
--
-- len -= PAGE_SIZE;
-- addr += PAGE_SIZE;
-- pgoff++;
-- }
-- return 0;
--}
--
--#ifdef CONFIG_NUMA
--int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
--{
-- struct inode *i = vma->vm_file->f_path.dentry->d_inode;
-- return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
--}
--
--struct mempolicy *
--shmem_get_policy(struct vm_area_struct *vma, unsigned long addr)
--{
-- struct inode *i = vma->vm_file->f_path.dentry->d_inode;
-- unsigned long idx;
--
-- idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
-- return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx);
--}
--#endif
--
--int shmem_lock(struct file *file, int lock, struct user_struct *user)
--{
-- struct inode *inode = file->f_path.dentry->d_inode;
-- struct shmem_inode_info *info = SHMEM_I(inode);
-- int retval = -ENOMEM;
--
-- spin_lock(&info->lock);
-- if (lock && !(info->flags & VM_LOCKED)) {
-- if (!user_shm_lock(inode->i_size, user))
-- goto out_nomem;
-- info->flags |= VM_LOCKED;
-- }
-- if (!lock && (info->flags & VM_LOCKED) && user) {
-- user_shm_unlock(inode->i_size, user);
-- info->flags &= ~VM_LOCKED;
-- }
-- retval = 0;
--out_nomem:
-- spin_unlock(&info->lock);
-- return retval;
--}
--
--static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
--{
-- file_accessed(file);
-- vma->vm_ops = &shmem_vm_ops;
-- return 0;
--}
--
--static struct inode *
--shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
--{
-- struct inode *inode;
-- struct shmem_inode_info *info;
-- struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
--
-- if (sbinfo->max_inodes) {
-- spin_lock(&sbinfo->stat_lock);
-- if (!sbinfo->free_inodes) {
-- spin_unlock(&sbinfo->stat_lock);
-- return NULL;
-- }
-- sbinfo->free_inodes--;
-- spin_unlock(&sbinfo->stat_lock);
-- }
--
-- inode = new_inode(sb);
-- if (inode) {
-- inode->i_mode = mode;
-- inode->i_uid = current->fsuid;
-- inode->i_gid = current->fsgid;
-- inode->i_blocks = 0;
-- inode->i_mapping->a_ops = &shmem_aops;
-- inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
-- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-- inode->i_generation = get_seconds();
-- info = SHMEM_I(inode);
-- memset(info, 0, (char *)inode - (char *)info);
-- spin_lock_init(&info->lock);
-- INIT_LIST_HEAD(&info->swaplist);
--
-- switch (mode & S_IFMT) {
-- default:
-- inode->i_op = &shmem_special_inode_operations;
-- init_special_inode(inode, mode, dev);
-- break;
-- case S_IFREG:
-- inode->i_op = &shmem_inode_operations;
-- inode->i_fop = &shmem_file_operations;
-- mpol_shared_policy_init(&info->policy, sbinfo->policy,
-- &sbinfo->policy_nodes);
-- break;
-- case S_IFDIR:
-- inc_nlink(inode);
-- /* Some things misbehave if size == 0 on a directory */
-- inode->i_size = 2 * BOGO_DIRENT_SIZE;
-- inode->i_op = &shmem_dir_inode_operations;
-- inode->i_fop = &simple_dir_operations;
-- break;
-- case S_IFLNK:
-- /*
-- * Must not load anything in the rbtree,
-- * mpol_free_shared_policy will not be called.
-- */
-- mpol_shared_policy_init(&info->policy, MPOL_DEFAULT,
-- NULL);
-- break;
-- }
-- } else if (sbinfo->max_inodes) {
-- spin_lock(&sbinfo->stat_lock);
-- sbinfo->free_inodes++;
-- spin_unlock(&sbinfo->stat_lock);
-- }
-- return inode;
--}
--
--#ifdef CONFIG_TMPFS
--static const struct inode_operations shmem_symlink_inode_operations;
--static const struct inode_operations shmem_symlink_inline_operations;
--
--/*
-- * Normally tmpfs makes no use of shmem_prepare_write, but it
-- * lets a tmpfs file be used read-write below the loop driver.
-- */
--static int
--shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
--{
-- struct inode *inode = page->mapping->host;
-- return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL);
--}
--
--static ssize_t
--shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
--{
-- struct inode *inode = file->f_path.dentry->d_inode;
-- loff_t pos;
-- unsigned long written;
-- ssize_t err;
--
-- if ((ssize_t) count < 0)
-- return -EINVAL;
--
-- if (!access_ok(VERIFY_READ, buf, count))
-- return -EFAULT;
--
-- mutex_lock(&inode->i_mutex);
--
-- pos = *ppos;
-- written = 0;
--
-- err = generic_write_checks(file, &pos, &count, 0);
-- if (err || !count)
-- goto out;
--
-- err = remove_suid(file->f_path.dentry);
-- if (err)
-- goto out;
--
-- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
--
-- do {
-- struct page *page = NULL;
-- unsigned long bytes, index, offset;
-- char *kaddr;
-- int left;
--
-- offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
-- index = pos >> PAGE_CACHE_SHIFT;
-- bytes = PAGE_CACHE_SIZE - offset;
-- if (bytes > count)
-- bytes = count;
--
-- /*
-- * We don't hold page lock across copy from user -
-- * what would it guard against? - so no deadlock here.
-- * But it still may be a good idea to prefault below.
-- */
--
-- err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL);
-- if (err)
-- break;
--
-- left = bytes;
-- if (PageHighMem(page)) {
-- volatile unsigned char dummy;
-- __get_user(dummy, buf);
-- __get_user(dummy, buf + bytes - 1);
--
-- kaddr = kmap_atomic(page, KM_USER0);
-- left = __copy_from_user_inatomic(kaddr + offset,
-- buf, bytes);
-- kunmap_atomic(kaddr, KM_USER0);
-- }
-- if (left) {
-- kaddr = kmap(page);
-- left = __copy_from_user(kaddr + offset, buf, bytes);
-- kunmap(page);
-- }
--
-- written += bytes;
-- count -= bytes;
-- pos += bytes;
-- buf += bytes;
-- if (pos > inode->i_size)
-- i_size_write(inode, pos);
--
-- flush_dcache_page(page);
-- set_page_dirty(page);
-- mark_page_accessed(page);
-- page_cache_release(page);
--
-- if (left) {
-- pos -= left;
-- written -= left;
-- err = -EFAULT;
-- break;
-- }
--
-- /*
-- * Our dirty pages are not counted in nr_dirty,
-- * and we do not attempt to balance dirty pages.
-- */
--
-- cond_resched();
-- } while (count);
--
-- *ppos = pos;
-- if (written)
-- err = written;
--out:
-- mutex_unlock(&inode->i_mutex);
-- return err;
--}
--
--static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
--{
-- struct inode *inode = filp->f_path.dentry->d_inode;
-- struct address_space *mapping = inode->i_mapping;
-- unsigned long index, offset;
--
-- index = *ppos >> PAGE_CACHE_SHIFT;
-- offset = *ppos & ~PAGE_CACHE_MASK;
--
-- for (;;) {
-- struct page *page = NULL;
-- unsigned long end_index, nr, ret;
-- loff_t i_size = i_size_read(inode);
--
-- end_index = i_size >> PAGE_CACHE_SHIFT;
-- if (index > end_index)
-- break;
-- if (index == end_index) {
-- nr = i_size & ~PAGE_CACHE_MASK;
-- if (nr <= offset)
-- break;
-- }
--
-- desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL);
-- if (desc->error) {
-- if (desc->error == -EINVAL)
-- desc->error = 0;
-- break;
-- }
--
-- /*
-- * We must evaluate after, since reads (unlike writes)
-- * are called without i_mutex protection against truncate
-- */
-- nr = PAGE_CACHE_SIZE;
-- i_size = i_size_read(inode);
-- end_index = i_size >> PAGE_CACHE_SHIFT;
-- if (index == end_index) {
-- nr = i_size & ~PAGE_CACHE_MASK;
-- if (nr <= offset) {
-- if (page)
-- page_cache_release(page);
-- break;
-- }
-- }
-- nr -= offset;
--
-- if (page) {
-- /*
-- * If users can be writing to this page using arbitrary
-- * virtual addresses, take care about potential aliasing
-- * before reading the page on the kernel side.
-- */
-- if (mapping_writably_mapped(mapping))
-- flush_dcache_page(page);
-- /*
-- * Mark the page accessed if we read the beginning.
-- */
-- if (!offset)
-- mark_page_accessed(page);
-- } else {
-- page = ZERO_PAGE(0);
-- page_cache_get(page);
-- }
--
-- /*
-- * Ok, we have the page, and it's up-to-date, so
-- * now we can copy it to user space...
-- *
-- * The actor routine returns how many bytes were actually used..
-- * NOTE! This may not be the same as how much of a user buffer
-- * we filled up (we may be padding etc), so we can only update
-- * "pos" here (the actor routine has to update the user buffer
-- * pointers and the remaining count).
-- */
-- ret = actor(desc, page, offset, nr);
-- offset += ret;
-- index += offset >> PAGE_CACHE_SHIFT;
-- offset &= ~PAGE_CACHE_MASK;
--
-- page_cache_release(page);
-- if (ret != nr || !desc->count)
-- break;
--
-- cond_resched();
-- }
--
-- *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
-- file_accessed(filp);
--}
--
--static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
--{
-- read_descriptor_t desc;
--
-- if ((ssize_t) count < 0)
-- return -EINVAL;
-- if (!access_ok(VERIFY_WRITE, buf, count))
-- return -EFAULT;
-- if (!count)
-- return 0;
--
-- desc.written = 0;
-- desc.count = count;
-- desc.arg.buf = buf;
-- desc.error = 0;
--
-- do_shmem_file_read(filp, ppos, &desc, file_read_actor);
-- if (desc.written)
-- return desc.written;
-- return desc.error;
--}
--
--static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
-- size_t count, read_actor_t actor, void *target)
--{
-- read_descriptor_t desc;
--
-- if (!count)
-- return 0;
--
-- desc.written = 0;
-- desc.count = count;
-- desc.arg.data = target;
-- desc.error = 0;
--
-- do_shmem_file_read(in_file, ppos, &desc, actor);
-- if (desc.written)
-- return desc.written;
-- return desc.error;
--}
--
--static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
--{
-- struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
--
-- buf->f_type = TMPFS_MAGIC;
-- buf->f_bsize = PAGE_CACHE_SIZE;
-- buf->f_namelen = NAME_MAX;
-- spin_lock(&sbinfo->stat_lock);
-- if (sbinfo->max_blocks) {
-- buf->f_blocks = sbinfo->max_blocks;
-- buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
-- }
-- if (sbinfo->max_inodes) {
-- buf->f_files = sbinfo->max_inodes;
-- buf->f_ffree = sbinfo->free_inodes;
-- }
-- /* else leave those fields 0 like simple_statfs */
-- spin_unlock(&sbinfo->stat_lock);
-- return 0;
--}
--
--/*
-- * File creation. Allocate an inode, and we're done..
-- */
--static int
--shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
--{
-- struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
-- int error = -ENOSPC;
--
-- if (inode) {
-- error = security_inode_init_security(inode, dir, NULL, NULL,
-- NULL);
-- if (error) {
-- if (error != -EOPNOTSUPP) {
-- iput(inode);
-- return error;
-- }
-- }
-- error = shmem_acl_init(inode, dir);
-- if (error) {
-- iput(inode);
-- return error;
-- }
-- if (dir->i_mode & S_ISGID) {
-- inode->i_gid = dir->i_gid;
-- if (S_ISDIR(mode))
-- inode->i_mode |= S_ISGID;
-- }
-- dir->i_size += BOGO_DIRENT_SIZE;
-- dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-- d_instantiate(dentry, inode);
-- dget(dentry); /* Extra count - pin the dentry in core */
-- }
-- return error;
--}
--
--static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
--{
-- int error;
--
-- if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
-- return error;
-- inc_nlink(dir);
-- return 0;
--}
--
--static int shmem_create(struct inode *dir, struct dentry *dentry, int mode,
-- struct nameidata *nd)
--{
-- return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
--}
--
--/*
-- * Link a file..
-- */
--static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
--{
-- struct inode *inode = old_dentry->d_inode;
-- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
--
-- /*
-- * No ordinary (disk based) filesystem counts links as inodes;
-- * but each new link needs a new dentry, pinning lowmem, and
-- * tmpfs dentries cannot be pruned until they are unlinked.
-- */
-- if (sbinfo->max_inodes) {
-- spin_lock(&sbinfo->stat_lock);
-- if (!sbinfo->free_inodes) {
-- spin_unlock(&sbinfo->stat_lock);
-- return -ENOSPC;
-- }
-- sbinfo->free_inodes--;
-- spin_unlock(&sbinfo->stat_lock);
-- }
--
-- dir->i_size += BOGO_DIRENT_SIZE;
-- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-- inc_nlink(inode);
-- atomic_inc(&inode->i_count); /* New dentry reference */
-- dget(dentry); /* Extra pinning count for the created dentry */
-- d_instantiate(dentry, inode);
-- return 0;
--}
--
--static int shmem_unlink(struct inode *dir, struct dentry *dentry)
--{
-- struct inode *inode = dentry->d_inode;
--
-- if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) {
-- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
-- if (sbinfo->max_inodes) {
-- spin_lock(&sbinfo->stat_lock);
-- sbinfo->free_inodes++;
-- spin_unlock(&sbinfo->stat_lock);
-- }
-- }
--
-- dir->i_size -= BOGO_DIRENT_SIZE;
-- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-- drop_nlink(inode);
-- dput(dentry); /* Undo the count from "create" - this does all the work */
-- return 0;
--}
--
--static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
--{
-- if (!simple_empty(dentry))
-- return -ENOTEMPTY;
--
-- drop_nlink(dentry->d_inode);
-- drop_nlink(dir);
-- return shmem_unlink(dir, dentry);
--}
--
--/*
-- * The VFS layer already does all the dentry stuff for rename,
-- * we just have to decrement the usage count for the target if
-- * it exists so that the VFS layer correctly free's it when it
-- * gets overwritten.
-- */
--static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
--{
-- struct inode *inode = old_dentry->d_inode;
-- int they_are_dirs = S_ISDIR(inode->i_mode);
--
-- if (!simple_empty(new_dentry))
-- return -ENOTEMPTY;
--
-- if (new_dentry->d_inode) {
-- (void) shmem_unlink(new_dir, new_dentry);
-- if (they_are_dirs)
-- drop_nlink(old_dir);
-- } else if (they_are_dirs) {
-- drop_nlink(old_dir);
-- inc_nlink(new_dir);
-- }
--
-- old_dir->i_size -= BOGO_DIRENT_SIZE;
-- new_dir->i_size += BOGO_DIRENT_SIZE;
-- old_dir->i_ctime = old_dir->i_mtime =
-- new_dir->i_ctime = new_dir->i_mtime =
-- inode->i_ctime = CURRENT_TIME;
-- return 0;
--}
--
--static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
--{
-- int error;
-- int len;
-- struct inode *inode;
-- struct page *page = NULL;
-- char *kaddr;
-- struct shmem_inode_info *info;
--
-- len = strlen(symname) + 1;
-- if (len > PAGE_CACHE_SIZE)
-- return -ENAMETOOLONG;
--
-- inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
-- if (!inode)
-- return -ENOSPC;
--
-- error = security_inode_init_security(inode, dir, NULL, NULL,
-- NULL);
-- if (error) {
-- if (error != -EOPNOTSUPP) {
-- iput(inode);
-- return error;
-- }
-- error = 0;
-- }
--
-- info = SHMEM_I(inode);
-- inode->i_size = len-1;
-- if (len <= (char *)inode - (char *)info) {
-- /* do it inline */
-- memcpy(info, symname, len);
-- inode->i_op = &shmem_symlink_inline_operations;
-- } else {
-- error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
-- if (error) {
-- iput(inode);
-- return error;
-- }
-- inode->i_op = &shmem_symlink_inode_operations;
-- kaddr = kmap_atomic(page, KM_USER0);
-- memcpy(kaddr, symname, len);
-- kunmap_atomic(kaddr, KM_USER0);
-- set_page_dirty(page);
-- page_cache_release(page);
-- }
-- if (dir->i_mode & S_ISGID)
-- inode->i_gid = dir->i_gid;
-- dir->i_size += BOGO_DIRENT_SIZE;
-- dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-- d_instantiate(dentry, inode);
-- dget(dentry);
-- return 0;
--}
--
--static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
--{
-- nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
-- return NULL;
--}
--
--static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
--{
-- struct page *page = NULL;
-- int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
-- nd_set_link(nd, res ? ERR_PTR(res) : kmap(page));
-- return page;
--}
--
--static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
--{
-- if (!IS_ERR(nd_get_link(nd))) {
-- struct page *page = cookie;
-- kunmap(page);
-- mark_page_accessed(page);
-- page_cache_release(page);
-- }
--}
--
--static const struct inode_operations shmem_symlink_inline_operations = {
-- .readlink = generic_readlink,
-- .follow_link = shmem_follow_link_inline,
--};
--
--static const struct inode_operations shmem_symlink_inode_operations = {
-- .truncate = shmem_truncate,
-- .readlink = generic_readlink,
-- .follow_link = shmem_follow_link,
-- .put_link = shmem_put_link,
--};
--
--#ifdef CONFIG_TMPFS_POSIX_ACL
--/**
-- * Superblocks without xattr inode operations will get security.* xattr
-- * support from the VFS "for free". As soon as we have any other xattrs
-- * like ACLs, we also need to implement the security.* handlers at
-- * filesystem level, though.
-- */
--
--static size_t shmem_xattr_security_list(struct inode *inode, char *list,
-- size_t list_len, const char *name,
-- size_t name_len)
--{
-- return security_inode_listsecurity(inode, list, list_len);
--}
--
--static int shmem_xattr_security_get(struct inode *inode, const char *name,
-- void *buffer, size_t size)
--{
-- if (strcmp(name, "") == 0)
-- return -EINVAL;
-- return security_inode_getsecurity(inode, name, buffer, size,
-- -EOPNOTSUPP);
--}
--
--static int shmem_xattr_security_set(struct inode *inode, const char *name,
-- const void *value, size_t size, int flags)
--{
-- if (strcmp(name, "") == 0)
-- return -EINVAL;
-- return security_inode_setsecurity(inode, name, value, size, flags);
--}
--
--static struct xattr_handler shmem_xattr_security_handler = {
-- .prefix = XATTR_SECURITY_PREFIX,
-- .list = shmem_xattr_security_list,
-- .get = shmem_xattr_security_get,
-- .set = shmem_xattr_security_set,
--};
--
--static struct xattr_handler *shmem_xattr_handlers[] = {
-- &shmem_xattr_acl_access_handler,
-- &shmem_xattr_acl_default_handler,
-- &shmem_xattr_security_handler,
-- NULL
--};
--#endif
--
--static struct dentry *shmem_get_parent(struct dentry *child)
--{
-- return ERR_PTR(-ESTALE);
--}
--
--static int shmem_match(struct inode *ino, void *vfh)
--{
-- __u32 *fh = vfh;
-- __u64 inum = fh[2];
-- inum = (inum << 32) | fh[1];
-- return ino->i_ino == inum && fh[0] == ino->i_generation;
--}
--
--static struct dentry *shmem_get_dentry(struct super_block *sb, void *vfh)
--{
-- struct dentry *de = NULL;
-- struct inode *inode;
-- __u32 *fh = vfh;
-- __u64 inum = fh[2];
-- inum = (inum << 32) | fh[1];
--
-- inode = ilookup5(sb, (unsigned long)(inum+fh[0]), shmem_match, vfh);
-- if (inode) {
-- de = d_find_alias(inode);
-- iput(inode);
-- }
--
-- return de? de: ERR_PTR(-ESTALE);
--}
--
--static struct dentry *shmem_decode_fh(struct super_block *sb, __u32 *fh,
-- int len, int type,
-- int (*acceptable)(void *context, struct dentry *de),
-- void *context)
--{
-- if (len < 3)
-- return ERR_PTR(-ESTALE);
--
-- return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable,
-- context);
--}
--
--static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
-- int connectable)
--{
-- struct inode *inode = dentry->d_inode;
--
-- if (*len < 3)
-- return 255;
--
-- if (hlist_unhashed(&inode->i_hash)) {
-- /* Unfortunately insert_inode_hash is not idempotent,
-- * so as we hash inodes here rather than at creation
-- * time, we need a lock to ensure we only try
-- * to do it once
-- */
-- static DEFINE_SPINLOCK(lock);
-- spin_lock(&lock);
-- if (hlist_unhashed(&inode->i_hash))
-- __insert_inode_hash(inode,
-- inode->i_ino + inode->i_generation);
-- spin_unlock(&lock);
-- }
--
-- fh[0] = inode->i_generation;
-- fh[1] = inode->i_ino;
-- fh[2] = ((__u64)inode->i_ino) >> 32;
--
-- *len = 3;
-- return 1;
--}
--
--static struct export_operations shmem_export_ops = {
-- .get_parent = shmem_get_parent,
-- .get_dentry = shmem_get_dentry,
-- .encode_fh = shmem_encode_fh,
-- .decode_fh = shmem_decode_fh,
--};
--
--static int shmem_parse_options(char *options, int *mode, uid_t *uid,
-- gid_t *gid, unsigned long *blocks, unsigned long *inodes,
-- int *policy, nodemask_t *policy_nodes)
--{
-- char *this_char, *value, *rest;
--
-- while (options != NULL) {
-- this_char = options;
-- for (;;) {
-- /*
-- * NUL-terminate this option: unfortunately,
-- * mount options form a comma-separated list,
-- * but mpol's nodelist may also contain commas.
-- */
-- options = strchr(options, ',');
-- if (options == NULL)
-- break;
-- options++;
-- if (!isdigit(*options)) {
-- options[-1] = '\0';
-- break;
-- }
-- }
-- if (!*this_char)
-- continue;
-- if ((value = strchr(this_char,'=')) != NULL) {
-- *value++ = 0;
-- } else {
-- printk(KERN_ERR
-- "tmpfs: No value for mount option '%s'\n",
-- this_char);
-- return 1;
-- }
--
-- if (!strcmp(this_char,"size")) {
-- unsigned long long size;
-- size = memparse(value,&rest);
-- if (*rest == '%') {
-- size <<= PAGE_SHIFT;
-- size *= totalram_pages;
-- do_div(size, 100);
-- rest++;
-- }
-- if (*rest)
-- goto bad_val;
-- *blocks = size >> PAGE_CACHE_SHIFT;
-- } else if (!strcmp(this_char,"nr_blocks")) {
-- *blocks = memparse(value,&rest);
-- if (*rest)
-- goto bad_val;
-- } else if (!strcmp(this_char,"nr_inodes")) {
-- *inodes = memparse(value,&rest);
-- if (*rest)
-- goto bad_val;
-- } else if (!strcmp(this_char,"mode")) {
-- if (!mode)
-- continue;
-- *mode = simple_strtoul(value,&rest,8);
-- if (*rest)
-- goto bad_val;
-- } else if (!strcmp(this_char,"uid")) {
-- if (!uid)
-- continue;
-- *uid = simple_strtoul(value,&rest,0);
-- if (*rest)
-- goto bad_val;
-- } else if (!strcmp(this_char,"gid")) {
-- if (!gid)
-- continue;
-- *gid = simple_strtoul(value,&rest,0);
-- if (*rest)
-- goto bad_val;
-- } else if (!strcmp(this_char,"mpol")) {
-- if (shmem_parse_mpol(value,policy,policy_nodes))
-- goto bad_val;
-- } else {
-- printk(KERN_ERR "tmpfs: Bad mount option %s\n",
-- this_char);
-- return 1;
-- }
-- }
-- return 0;
--
--bad_val:
-- printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
-- value, this_char);
-- return 1;
--
--}
--
--static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
--{
-- struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
-- unsigned long max_blocks = sbinfo->max_blocks;
-- unsigned long max_inodes = sbinfo->max_inodes;
-- int policy = sbinfo->policy;
-- nodemask_t policy_nodes = sbinfo->policy_nodes;
-- unsigned long blocks;
-- unsigned long inodes;
-- int error = -EINVAL;
--
-- if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks,
-- &max_inodes, &policy, &policy_nodes))
-- return error;
--
-- spin_lock(&sbinfo->stat_lock);
-- blocks = sbinfo->max_blocks - sbinfo->free_blocks;
-- inodes = sbinfo->max_inodes - sbinfo->free_inodes;
-- if (max_blocks < blocks)
-- goto out;
-- if (max_inodes < inodes)
-- goto out;
-- /*
-- * Those tests also disallow limited->unlimited while any are in
-- * use, so i_blocks will always be zero when max_blocks is zero;
-- * but we must separately disallow unlimited->limited, because
-- * in that case we have no record of how much is already in use.
-- */
-- if (max_blocks && !sbinfo->max_blocks)
-- goto out;
-- if (max_inodes && !sbinfo->max_inodes)
-- goto out;
--
-- error = 0;
-- sbinfo->max_blocks = max_blocks;
-- sbinfo->free_blocks = max_blocks - blocks;
-- sbinfo->max_inodes = max_inodes;
-- sbinfo->free_inodes = max_inodes - inodes;
-- sbinfo->policy = policy;
-- sbinfo->policy_nodes = policy_nodes;
--out:
-- spin_unlock(&sbinfo->stat_lock);
-- return error;
--}
--#endif
--
--static void shmem_put_super(struct super_block *sb)
--{
-- kfree(sb->s_fs_info);
-- sb->s_fs_info = NULL;
--}
--
--static int shmem_fill_super(struct super_block *sb,
-- void *data, int silent)
--{
-- struct inode *inode;
-- struct dentry *root;
-- int mode = S_IRWXUGO | S_ISVTX;
-- uid_t uid = current->fsuid;
-- gid_t gid = current->fsgid;
-- int err = -ENOMEM;
-- struct shmem_sb_info *sbinfo;
-- unsigned long blocks = 0;
-- unsigned long inodes = 0;
-- int policy = MPOL_DEFAULT;
-- nodemask_t policy_nodes = node_online_map;
--
--#ifdef CONFIG_TMPFS
-- /*
-- * Per default we only allow half of the physical ram per
-- * tmpfs instance, limiting inodes to one per page of lowmem;
-- * but the internal instance is left unlimited.
-- */
-- if (!(sb->s_flags & MS_NOUSER)) {
-- blocks = totalram_pages / 2;
-- inodes = totalram_pages - totalhigh_pages;
-- if (inodes > blocks)
-- inodes = blocks;
-- if (shmem_parse_options(data, &mode, &uid, &gid, &blocks,
-- &inodes, &policy, &policy_nodes))
-- return -EINVAL;
-- }
-- sb->s_export_op = &shmem_export_ops;
--#else
-- sb->s_flags |= MS_NOUSER;
--#endif
--
-- /* Round up to L1_CACHE_BYTES to resist false sharing */
-- sbinfo = kmalloc(max((int)sizeof(struct shmem_sb_info),
-- L1_CACHE_BYTES), GFP_KERNEL);
-- if (!sbinfo)
-- return -ENOMEM;
--
-- spin_lock_init(&sbinfo->stat_lock);
-- sbinfo->max_blocks = blocks;
-- sbinfo->free_blocks = blocks;
-- sbinfo->max_inodes = inodes;
-- sbinfo->free_inodes = inodes;
-- sbinfo->policy = policy;
-- sbinfo->policy_nodes = policy_nodes;
--
-- sb->s_fs_info = sbinfo;
-- sb->s_maxbytes = SHMEM_MAX_BYTES;
-- sb->s_blocksize = PAGE_CACHE_SIZE;
-- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
-- sb->s_magic = TMPFS_MAGIC;
-- sb->s_op = &shmem_ops;
-- sb->s_time_gran = 1;
--#ifdef CONFIG_TMPFS_POSIX_ACL
-- sb->s_xattr = shmem_xattr_handlers;
-- sb->s_flags |= MS_POSIXACL;
--#endif
--
-- inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
-- if (!inode)
-- goto failed;
-- inode->i_uid = uid;
-- inode->i_gid = gid;
-- root = d_alloc_root(inode);
-- if (!root)
-- goto failed_iput;
-- sb->s_root = root;
-- return 0;
--
--failed_iput:
-- iput(inode);
--failed:
-- shmem_put_super(sb);
-- return err;
--}
--
--static struct kmem_cache *shmem_inode_cachep;
--
--static struct inode *shmem_alloc_inode(struct super_block *sb)
--{
-- struct shmem_inode_info *p;
-- p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
-- if (!p)
-- return NULL;
-- return &p->vfs_inode;
--}
--
--static void shmem_destroy_inode(struct inode *inode)
--{
-- if ((inode->i_mode & S_IFMT) == S_IFREG) {
-- /* only struct inode is valid if it's an inline symlink */
-- mpol_free_shared_policy(&SHMEM_I(inode)->policy);
-- }
-- shmem_acl_destroy_inode(inode);
-- kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
--}
--
--static void init_once(void *foo, struct kmem_cache *cachep,
-- unsigned long flags)
--{
-- struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
--
-- inode_init_once(&p->vfs_inode);
--#ifdef CONFIG_TMPFS_POSIX_ACL
-- p->i_acl = NULL;
-- p->i_default_acl = NULL;
--#endif
--}
--
--static int init_inodecache(void)
--{
-- shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
-- sizeof(struct shmem_inode_info),
-- 0, 0, init_once, NULL);
-- if (shmem_inode_cachep == NULL)
-- return -ENOMEM;
-- return 0;
--}
--
--static void destroy_inodecache(void)
--{
-- kmem_cache_destroy(shmem_inode_cachep);
--}
--
--static const struct address_space_operations shmem_aops = {
-- .writepage = shmem_writepage,
-- .set_page_dirty = __set_page_dirty_no_writeback,
--#ifdef CONFIG_TMPFS
-- .prepare_write = shmem_prepare_write,
-- .commit_write = simple_commit_write,
--#endif
-- .migratepage = migrate_page,
--};
--
--static const struct file_operations shmem_file_operations = {
-- .mmap = shmem_mmap,
--#ifdef CONFIG_TMPFS
-- .llseek = generic_file_llseek,
-- .read = shmem_file_read,
-- .write = shmem_file_write,
-- .fsync = simple_sync_file,
-- .sendfile = shmem_file_sendfile,
--#endif
--};
--
--static const struct inode_operations shmem_inode_operations = {
-- .truncate = shmem_truncate,
-- .setattr = shmem_notify_change,
-- .truncate_range = shmem_truncate_range,
--#ifdef CONFIG_TMPFS_POSIX_ACL
-- .setxattr = generic_setxattr,
-- .getxattr = generic_getxattr,
-- .listxattr = generic_listxattr,
-- .removexattr = generic_removexattr,
-- .permission = shmem_permission,
--#endif
--
--};
--
--static const struct inode_operations shmem_dir_inode_operations = {
--#ifdef CONFIG_TMPFS
-- .create = shmem_create,
-- .lookup = simple_lookup,
-- .link = shmem_link,
-- .unlink = shmem_unlink,
-- .symlink = shmem_symlink,
-- .mkdir = shmem_mkdir,
-- .rmdir = shmem_rmdir,
-- .mknod = shmem_mknod,
-- .rename = shmem_rename,
--#endif
--#ifdef CONFIG_TMPFS_POSIX_ACL
-- .setattr = shmem_notify_change,
-- .setxattr = generic_setxattr,
-- .getxattr = generic_getxattr,
-- .listxattr = generic_listxattr,
-- .removexattr = generic_removexattr,
-- .permission = shmem_permission,
--#endif
--};
--
--static const struct inode_operations shmem_special_inode_operations = {
--#ifdef CONFIG_TMPFS_POSIX_ACL
-- .setattr = shmem_notify_change,
-- .setxattr = generic_setxattr,
-- .getxattr = generic_getxattr,
-- .listxattr = generic_listxattr,
-- .removexattr = generic_removexattr,
-- .permission = shmem_permission,
--#endif
--};
--
--static const struct super_operations shmem_ops = {
-- .alloc_inode = shmem_alloc_inode,
-- .destroy_inode = shmem_destroy_inode,
--#ifdef CONFIG_TMPFS
-- .statfs = shmem_statfs,
-- .remount_fs = shmem_remount_fs,
--#endif
-- .delete_inode = shmem_delete_inode,
-- .drop_inode = generic_delete_inode,
-- .put_super = shmem_put_super,
--};
--
--static struct vm_operations_struct shmem_vm_ops = {
-- .nopage = shmem_nopage,
-- .populate = shmem_populate,
--#ifdef CONFIG_NUMA
-- .set_policy = shmem_set_policy,
-- .get_policy = shmem_get_policy,
--#endif
--};
--
--
--static int shmem_get_sb(struct file_system_type *fs_type,
-- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
--{
-- return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt);
--}
--
--static struct file_system_type tmpfs_fs_type = {
-- .owner = THIS_MODULE,
-- .name = "tmpfs",
-- .get_sb = shmem_get_sb,
-- .kill_sb = kill_litter_super,
--};
--static struct vfsmount *shm_mnt;
--
--static int __init init_tmpfs(void)
--{
-- int error;
--
-- error = init_inodecache();
-- if (error)
-- goto out3;
--
-- error = register_filesystem(&tmpfs_fs_type);
-- if (error) {
-- printk(KERN_ERR "Could not register tmpfs\n");
-- goto out2;
-- }
--
-- shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER,
-- tmpfs_fs_type.name, NULL);
-- if (IS_ERR(shm_mnt)) {
-- error = PTR_ERR(shm_mnt);
-- printk(KERN_ERR "Could not kern_mount tmpfs\n");
-- goto out1;
-- }
-- return 0;
--
--out1:
-- unregister_filesystem(&tmpfs_fs_type);
--out2:
-- destroy_inodecache();
--out3:
-- shm_mnt = ERR_PTR(error);
-- return error;
--}
--module_init(init_tmpfs)
--
--/*
-- * shmem_file_setup - get an unlinked file living in tmpfs
-- *
-- * @name: name for dentry (to be seen in /proc/<pid>/maps
-- * @size: size to be set for the file
-- *
-- */
--struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
--{
-- int error;
-- struct file *file;
-- struct inode *inode;
-- struct dentry *dentry, *root;
-- struct qstr this;
--
-- if (IS_ERR(shm_mnt))
-- return (void *)shm_mnt;
--
-- if (size < 0 || size > SHMEM_MAX_BYTES)
-- return ERR_PTR(-EINVAL);
--
-- if (shmem_acct_size(flags, size))
-- return ERR_PTR(-ENOMEM);
--
-- error = -ENOMEM;
-- this.name = name;
-- this.len = strlen(name);
-- this.hash = 0; /* will go */
-- root = shm_mnt->mnt_root;
-- dentry = d_alloc(root, &this);
-- if (!dentry)
-- goto put_memory;
--
-- error = -ENFILE;
-- file = get_empty_filp();
-- if (!file)
-- goto put_dentry;
--
-- error = -ENOSPC;
-- inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
-- if (!inode)
-- goto close_file;
--
-- SHMEM_I(inode)->flags = flags & VM_ACCOUNT;
-- d_instantiate(dentry, inode);
-- inode->i_size = size;
-- inode->i_nlink = 0; /* It is unlinked */
-- file->f_path.mnt = mntget(shm_mnt);
-- file->f_path.dentry = dentry;
-- file->f_mapping = inode->i_mapping;
-- file->f_op = &shmem_file_operations;
-- file->f_mode = FMODE_WRITE | FMODE_READ;
-- return file;
--
--close_file:
-- put_filp(file);
--put_dentry:
-- dput(dentry);
--put_memory:
-- shmem_unacct_size(flags, size);
-- return ERR_PTR(error);
--}
--
--/*
-- * shmem_zero_setup - setup a shared anonymous mapping
-- *
-- * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
-- */
--int shmem_zero_setup(struct vm_area_struct *vma)
--{
-- struct file *file;
-- loff_t size = vma->vm_end - vma->vm_start;
--
-- file = shmem_file_setup("dev/zero", size, vma->vm_flags);
-- if (IS_ERR(file))
-- return PTR_ERR(file);
--
-- if (vma->vm_file)
-- fput(vma->vm_file);
-- vma->vm_file = file;
-- vma->vm_ops = &shmem_vm_ops;
-- return 0;
--}
diff -Nurb linux-2.6.22-570/mm/slab.c linux-2.6.22-590/mm/slab.c
--- linux-2.6.22-570/mm/slab.c 2008-03-20 13:25:46.000000000 -0400
+++ linux-2.6.22-590/mm/slab.c 2008-03-20 13:28:03.000000000 -0400
-EXPORT_SYMBOL(sysctl_local_port_range);
EXPORT_SYMBOL(sysctl_tcp_low_latency);
-diff -Nurb linux-2.6.22-570/net/ipv4/tcp_ipv4.c.orig linux-2.6.22-590/net/ipv4/tcp_ipv4.c.orig
---- linux-2.6.22-570/net/ipv4/tcp_ipv4.c.orig 2008-03-20 13:25:40.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/tcp_ipv4.c.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,2483 +0,0 @@
--/*
-- * INET An implementation of the TCP/IP protocol suite for the LINUX
-- * operating system. INET is implemented using the BSD Socket
-- * interface as the means of communication with the user level.
-- *
-- * Implementation of the Transmission Control Protocol(TCP).
-- *
-- * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
-- *
-- * IPv4 specific functions
-- *
-- *
-- * code split from:
-- * linux/ipv4/tcp.c
-- * linux/ipv4/tcp_input.c
-- * linux/ipv4/tcp_output.c
-- *
-- * See tcp.c for author information
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License
-- * as published by the Free Software Foundation; either version
-- * 2 of the License, or (at your option) any later version.
-- */
--
--/*
-- * Changes:
-- * David S. Miller : New socket lookup architecture.
-- * This code is dedicated to John Dyson.
-- * David S. Miller : Change semantics of established hash,
-- * half is devoted to TIME_WAIT sockets
-- * and the rest go in the other half.
-- * Andi Kleen : Add support for syncookies and fixed
-- * some bugs: ip options weren't passed to
-- * the TCP layer, missed a check for an
-- * ACK bit.
-- * Andi Kleen : Implemented fast path mtu discovery.
-- * Fixed many serious bugs in the
-- * request_sock handling and moved
-- * most of it into the af independent code.
-- * Added tail drop and some other bugfixes.
-- * Added new listen semantics.
-- * Mike McLagan : Routing by source
-- * Juan Jose Ciarlante: ip_dynaddr bits
-- * Andi Kleen: various fixes.
-- * Vitaly E. Lavrov : Transparent proxy revived after year
-- * coma.
-- * Andi Kleen : Fix new listen.
-- * Andi Kleen : Fix accept error reporting.
-- * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
-- * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
-- * a single port at the same time.
-- */
--
--
--#include <linux/types.h>
--#include <linux/fcntl.h>
--#include <linux/module.h>
--#include <linux/random.h>
--#include <linux/cache.h>
--#include <linux/jhash.h>
--#include <linux/init.h>
--#include <linux/times.h>
--
--#include <net/icmp.h>
--#include <net/inet_hashtables.h>
--#include <net/tcp.h>
--#include <net/transp_v6.h>
--#include <net/ipv6.h>
--#include <net/inet_common.h>
--#include <net/timewait_sock.h>
--#include <net/xfrm.h>
--#include <net/netdma.h>
--
--#include <linux/inet.h>
--#include <linux/ipv6.h>
--#include <linux/stddef.h>
--#include <linux/proc_fs.h>
--#include <linux/seq_file.h>
--
--#include <linux/crypto.h>
--#include <linux/scatterlist.h>
--
--int sysctl_tcp_tw_reuse __read_mostly;
--int sysctl_tcp_low_latency __read_mostly;
--
--/* Check TCP sequence numbers in ICMP packets. */
--#define ICMP_MIN_LENGTH 8
--
--/* Socket used for sending RSTs */
--static struct socket *tcp_socket __read_mostly;
--
--void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
--
--#ifdef CONFIG_TCP_MD5SIG
--static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
-- __be32 addr);
--static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
-- __be32 saddr, __be32 daddr,
-- struct tcphdr *th, int protocol,
-- int tcplen);
--#endif
--
--struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
-- .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
-- .lhash_users = ATOMIC_INIT(0),
-- .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
--};
--
--static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
--{
-- return inet_csk_get_port(&tcp_hashinfo, sk, snum,
-- inet_csk_bind_conflict);
--}
--
--static void tcp_v4_hash(struct sock *sk)
--{
-- inet_hash(&tcp_hashinfo, sk);
--}
--
--void tcp_unhash(struct sock *sk)
--{
-- inet_unhash(&tcp_hashinfo, sk);
--}
--
--static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
--{
-- return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
-- ip_hdr(skb)->saddr,
-- tcp_hdr(skb)->dest,
-- tcp_hdr(skb)->source);
--}
--
--int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
--{
-- const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
-- struct tcp_sock *tp = tcp_sk(sk);
--
-- /* With PAWS, it is safe from the viewpoint
-- of data integrity. Even without PAWS it is safe provided sequence
-- spaces do not overlap i.e. at data rates <= 80Mbit/sec.
--
-- Actually, the idea is close to VJ's one, only timestamp cache is
-- held not per host, but per port pair and TW bucket is used as state
-- holder.
--
-- If TW bucket has been already destroyed we fall back to VJ's scheme
-- and use initial timestamp retrieved from peer table.
-- */
-- if (tcptw->tw_ts_recent_stamp &&
-- (twp == NULL || (sysctl_tcp_tw_reuse &&
-- get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
-- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-- if (tp->write_seq == 0)
-- tp->write_seq = 1;
-- tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
-- tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
-- sock_hold(sktw);
-- return 1;
-- }
--
-- return 0;
--}
--
--EXPORT_SYMBOL_GPL(tcp_twsk_unique);
--
--/* This will initiate an outgoing connection. */
--int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
--{
-- struct inet_sock *inet = inet_sk(sk);
-- struct tcp_sock *tp = tcp_sk(sk);
-- struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
-- struct rtable *rt;
-- __be32 daddr, nexthop;
-- int tmp;
-- int err;
--
-- if (addr_len < sizeof(struct sockaddr_in))
-- return -EINVAL;
--
-- if (usin->sin_family != AF_INET)
-- return -EAFNOSUPPORT;
--
-- nexthop = daddr = usin->sin_addr.s_addr;
-- if (inet->opt && inet->opt->srr) {
-- if (!daddr)
-- return -EINVAL;
-- nexthop = inet->opt->faddr;
-- }
--
-- tmp = ip_route_connect(&rt, nexthop, inet->saddr,
-- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
-- IPPROTO_TCP,
-- inet->sport, usin->sin_port, sk, 1);
-- if (tmp < 0) {
-- if (tmp == -ENETUNREACH)
-- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
-- return tmp;
-- }
--
-- if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
-- ip_rt_put(rt);
-- return -ENETUNREACH;
-- }
--
-- if (!inet->opt || !inet->opt->srr)
-- daddr = rt->rt_dst;
--
-- if (!inet->saddr)
-- inet->saddr = rt->rt_src;
-- inet->rcv_saddr = inet->saddr;
--
-- if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
-- /* Reset inherited state */
-- tp->rx_opt.ts_recent = 0;
-- tp->rx_opt.ts_recent_stamp = 0;
-- tp->write_seq = 0;
-- }
--
-- if (tcp_death_row.sysctl_tw_recycle &&
-- !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
-- struct inet_peer *peer = rt_get_peer(rt);
-- /*
-- * VJ's idea. We save last timestamp seen from
-- * the destination in peer table, when entering state
-- * TIME-WAIT * and initialize rx_opt.ts_recent from it,
-- * when trying new connection.
-- */
-- if (peer != NULL &&
-- peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
-- tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
-- tp->rx_opt.ts_recent = peer->tcp_ts;
-- }
-- }
--
-- inet->dport = usin->sin_port;
-- inet->daddr = daddr;
--
-- inet_csk(sk)->icsk_ext_hdr_len = 0;
-- if (inet->opt)
-- inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
--
-- tp->rx_opt.mss_clamp = 536;
--
-- /* Socket identity is still unknown (sport may be zero).
-- * However we set state to SYN-SENT and not releasing socket
-- * lock select source port, enter ourselves into the hash tables and
-- * complete initialization after this.
-- */
-- tcp_set_state(sk, TCP_SYN_SENT);
-- err = inet_hash_connect(&tcp_death_row, sk);
-- if (err)
-- goto failure;
--
-- err = ip_route_newports(&rt, IPPROTO_TCP,
-- inet->sport, inet->dport, sk);
-- if (err)
-- goto failure;
--
-- /* OK, now commit destination to socket. */
-- sk->sk_gso_type = SKB_GSO_TCPV4;
-- sk_setup_caps(sk, &rt->u.dst);
--
-- if (!tp->write_seq)
-- tp->write_seq = secure_tcp_sequence_number(inet->saddr,
-- inet->daddr,
-- inet->sport,
-- usin->sin_port);
--
-- inet->id = tp->write_seq ^ jiffies;
--
-- err = tcp_connect(sk);
-- rt = NULL;
-- if (err)
-- goto failure;
--
-- return 0;
--
--failure:
-- /*
-- * This unhashes the socket and releases the local port,
-- * if necessary.
-- */
-- tcp_set_state(sk, TCP_CLOSE);
-- ip_rt_put(rt);
-- sk->sk_route_caps = 0;
-- inet->dport = 0;
-- return err;
--}
--
--/*
-- * This routine does path mtu discovery as defined in RFC1191.
-- */
--static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
--{
-- struct dst_entry *dst;
-- struct inet_sock *inet = inet_sk(sk);
--
-- /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
-- * send out by Linux are always <576bytes so they should go through
-- * unfragmented).
-- */
-- if (sk->sk_state == TCP_LISTEN)
-- return;
--
-- /* We don't check in the destentry if pmtu discovery is forbidden
-- * on this route. We just assume that no packet_to_big packets
-- * are send back when pmtu discovery is not active.
-- * There is a small race when the user changes this flag in the
-- * route, but I think that's acceptable.
-- */
-- if ((dst = __sk_dst_check(sk, 0)) == NULL)
-- return;
--
-- dst->ops->update_pmtu(dst, mtu);
--
-- /* Something is about to be wrong... Remember soft error
-- * for the case, if this connection will not able to recover.
-- */
-- if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
-- sk->sk_err_soft = EMSGSIZE;
--
-- mtu = dst_mtu(dst);
--
-- if (inet->pmtudisc != IP_PMTUDISC_DONT &&
-- inet_csk(sk)->icsk_pmtu_cookie > mtu) {
-- tcp_sync_mss(sk, mtu);
--
-- /* Resend the TCP packet because it's
-- * clear that the old packet has been
-- * dropped. This is the new "fast" path mtu
-- * discovery.
-- */
-- tcp_simple_retransmit(sk);
-- } /* else let the usual retransmit timer handle it */
--}
--
--/*
-- * This routine is called by the ICMP module when it gets some
-- * sort of error condition. If err < 0 then the socket should
-- * be closed and the error returned to the user. If err > 0
-- * it's just the icmp type << 8 | icmp code. After adjustment
-- * header points to the first 8 bytes of the tcp header. We need
-- * to find the appropriate port.
-- *
-- * The locking strategy used here is very "optimistic". When
-- * someone else accesses the socket the ICMP is just dropped
-- * and for some paths there is no check at all.
-- * A more general error queue to queue errors for later handling
-- * is probably better.
-- *
-- */
--
--void tcp_v4_err(struct sk_buff *skb, u32 info)
--{
-- struct iphdr *iph = (struct iphdr *)skb->data;
-- struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
-- struct tcp_sock *tp;
-- struct inet_sock *inet;
-- const int type = icmp_hdr(skb)->type;
-- const int code = icmp_hdr(skb)->code;
-- struct sock *sk;
-- __u32 seq;
-- int err;
--
-- if (skb->len < (iph->ihl << 2) + 8) {
-- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
-- return;
-- }
--
-- sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
-- th->source, inet_iif(skb));
-- if (!sk) {
-- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
-- return;
-- }
-- if (sk->sk_state == TCP_TIME_WAIT) {
-- inet_twsk_put(inet_twsk(sk));
-- return;
-- }
--
-- bh_lock_sock(sk);
-- /* If too many ICMPs get dropped on busy
-- * servers this needs to be solved differently.
-- */
-- if (sock_owned_by_user(sk))
-- NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
--
-- if (sk->sk_state == TCP_CLOSE)
-- goto out;
--
-- tp = tcp_sk(sk);
-- seq = ntohl(th->seq);
-- if (sk->sk_state != TCP_LISTEN &&
-- !between(seq, tp->snd_una, tp->snd_nxt)) {
-- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
-- goto out;
-- }
--
-- switch (type) {
-- case ICMP_SOURCE_QUENCH:
-- /* Just silently ignore these. */
-- goto out;
-- case ICMP_PARAMETERPROB:
-- err = EPROTO;
-- break;
-- case ICMP_DEST_UNREACH:
-- if (code > NR_ICMP_UNREACH)
-- goto out;
--
-- if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
-- if (!sock_owned_by_user(sk))
-- do_pmtu_discovery(sk, iph, info);
-- goto out;
-- }
--
-- err = icmp_err_convert[code].errno;
-- break;
-- case ICMP_TIME_EXCEEDED:
-- err = EHOSTUNREACH;
-- break;
-- default:
-- goto out;
-- }
--
-- switch (sk->sk_state) {
-- struct request_sock *req, **prev;
-- case TCP_LISTEN:
-- if (sock_owned_by_user(sk))
-- goto out;
--
-- req = inet_csk_search_req(sk, &prev, th->dest,
-- iph->daddr, iph->saddr);
-- if (!req)
-- goto out;
--
-- /* ICMPs are not backlogged, hence we cannot get
-- an established socket here.
-- */
-- BUG_TRAP(!req->sk);
--
-- if (seq != tcp_rsk(req)->snt_isn) {
-- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
-- goto out;
-- }
--
-- /*
-- * Still in SYN_RECV, just remove it silently.
-- * There is no good way to pass the error to the newly
-- * created socket, and POSIX does not want network
-- * errors returned from accept().
-- */
-- inet_csk_reqsk_queue_drop(sk, req, prev);
-- goto out;
--
-- case TCP_SYN_SENT:
-- case TCP_SYN_RECV: /* Cannot happen.
-- It can f.e. if SYNs crossed.
-- */
-- if (!sock_owned_by_user(sk)) {
-- sk->sk_err = err;
--
-- sk->sk_error_report(sk);
--
-- tcp_done(sk);
-- } else {
-- sk->sk_err_soft = err;
-- }
-- goto out;
-- }
--
-- /* If we've already connected we will keep trying
-- * until we time out, or the user gives up.
-- *
-- * rfc1122 4.2.3.9 allows to consider as hard errors
-- * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
-- * but it is obsoleted by pmtu discovery).
-- *
-- * Note, that in modern internet, where routing is unreliable
-- * and in each dark corner broken firewalls sit, sending random
-- * errors ordered by their masters even this two messages finally lose
-- * their original sense (even Linux sends invalid PORT_UNREACHs)
-- *
-- * Now we are in compliance with RFCs.
-- * --ANK (980905)
-- */
--
-- inet = inet_sk(sk);
-- if (!sock_owned_by_user(sk) && inet->recverr) {
-- sk->sk_err = err;
-- sk->sk_error_report(sk);
-- } else { /* Only an error on timeout */
-- sk->sk_err_soft = err;
-- }
--
--out:
-- bh_unlock_sock(sk);
-- sock_put(sk);
--}
--
--/* This routine computes an IPv4 TCP checksum. */
--void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
--{
-- struct inet_sock *inet = inet_sk(sk);
-- struct tcphdr *th = tcp_hdr(skb);
--
-- if (skb->ip_summed == CHECKSUM_PARTIAL) {
-- th->check = ~tcp_v4_check(len, inet->saddr,
-- inet->daddr, 0);
-- skb->csum_start = skb_transport_header(skb) - skb->head;
-- skb->csum_offset = offsetof(struct tcphdr, check);
-- } else {
-- th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
-- csum_partial((char *)th,
-- th->doff << 2,
-- skb->csum));
-- }
--}
--
--int tcp_v4_gso_send_check(struct sk_buff *skb)
--{
-- const struct iphdr *iph;
-- struct tcphdr *th;
--
-- if (!pskb_may_pull(skb, sizeof(*th)))
-- return -EINVAL;
--
-- iph = ip_hdr(skb);
-- th = tcp_hdr(skb);
--
-- th->check = 0;
-- th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
-- skb->csum_start = skb_transport_header(skb) - skb->head;
-- skb->csum_offset = offsetof(struct tcphdr, check);
-- skb->ip_summed = CHECKSUM_PARTIAL;
-- return 0;
--}
--
--/*
-- * This routine will send an RST to the other tcp.
-- *
-- * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
-- * for reset.
-- * Answer: if a packet caused RST, it is not for a socket
-- * existing in our system, if it is matched to a socket,
-- * it is just duplicate segment or bug in other side's TCP.
-- * So that we build reply only basing on parameters
-- * arrived with segment.
-- * Exception: precedence violation. We do not implement it in any case.
-- */
--
--static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
--{
-- struct tcphdr *th = tcp_hdr(skb);
-- struct {
-- struct tcphdr th;
--#ifdef CONFIG_TCP_MD5SIG
-- __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
--#endif
-- } rep;
-- struct ip_reply_arg arg;
--#ifdef CONFIG_TCP_MD5SIG
-- struct tcp_md5sig_key *key;
--#endif
--
-- /* Never send a reset in response to a reset. */
-- if (th->rst)
-- return;
--
-- if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
-- return;
--
-- /* Swap the send and the receive. */
-- memset(&rep, 0, sizeof(rep));
-- rep.th.dest = th->source;
-- rep.th.source = th->dest;
-- rep.th.doff = sizeof(struct tcphdr) / 4;
-- rep.th.rst = 1;
--
-- if (th->ack) {
-- rep.th.seq = th->ack_seq;
-- } else {
-- rep.th.ack = 1;
-- rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
-- skb->len - (th->doff << 2));
-- }
--
-- memset(&arg, 0, sizeof(arg));
-- arg.iov[0].iov_base = (unsigned char *)&rep;
-- arg.iov[0].iov_len = sizeof(rep.th);
--
--#ifdef CONFIG_TCP_MD5SIG
-- key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
-- if (key) {
-- rep.opt[0] = htonl((TCPOPT_NOP << 24) |
-- (TCPOPT_NOP << 16) |
-- (TCPOPT_MD5SIG << 8) |
-- TCPOLEN_MD5SIG);
-- /* Update length and the length the header thinks exists */
-- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
-- rep.th.doff = arg.iov[0].iov_len / 4;
--
-- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
-- key,
-- ip_hdr(skb)->daddr,
-- ip_hdr(skb)->saddr,
-- &rep.th, IPPROTO_TCP,
-- arg.iov[0].iov_len);
-- }
--#endif
-- arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
-- ip_hdr(skb)->saddr, /* XXX */
-- sizeof(struct tcphdr), IPPROTO_TCP, 0);
-- arg.csumoffset = offsetof(struct tcphdr, check) / 2;
--
-- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
--
-- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
-- TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
--}
--
--/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
-- outside socket context is ugly, certainly. What can I do?
-- */
--
--static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
-- struct sk_buff *skb, u32 seq, u32 ack,
-- u32 win, u32 ts)
--{
-- struct tcphdr *th = tcp_hdr(skb);
-- struct {
-- struct tcphdr th;
-- __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
--#ifdef CONFIG_TCP_MD5SIG
-- + (TCPOLEN_MD5SIG_ALIGNED >> 2)
--#endif
-- ];
-- } rep;
-- struct ip_reply_arg arg;
--#ifdef CONFIG_TCP_MD5SIG
-- struct tcp_md5sig_key *key;
-- struct tcp_md5sig_key tw_key;
--#endif
--
-- memset(&rep.th, 0, sizeof(struct tcphdr));
-- memset(&arg, 0, sizeof(arg));
--
-- arg.iov[0].iov_base = (unsigned char *)&rep;
-- arg.iov[0].iov_len = sizeof(rep.th);
-- if (ts) {
-- rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
-- (TCPOPT_TIMESTAMP << 8) |
-- TCPOLEN_TIMESTAMP);
-- rep.opt[1] = htonl(tcp_time_stamp);
-- rep.opt[2] = htonl(ts);
-- arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
-- }
--
-- /* Swap the send and the receive. */
-- rep.th.dest = th->source;
-- rep.th.source = th->dest;
-- rep.th.doff = arg.iov[0].iov_len / 4;
-- rep.th.seq = htonl(seq);
-- rep.th.ack_seq = htonl(ack);
-- rep.th.ack = 1;
-- rep.th.window = htons(win);
--
--#ifdef CONFIG_TCP_MD5SIG
-- /*
-- * The SKB holds an imcoming packet, but may not have a valid ->sk
-- * pointer. This is especially the case when we're dealing with a
-- * TIME_WAIT ack, because the sk structure is long gone, and only
-- * the tcp_timewait_sock remains. So the md5 key is stashed in that
-- * structure, and we use it in preference. I believe that (twsk ||
-- * skb->sk) holds true, but we program defensively.
-- */
-- if (!twsk && skb->sk) {
-- key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
-- } else if (twsk && twsk->tw_md5_keylen) {
-- tw_key.key = twsk->tw_md5_key;
-- tw_key.keylen = twsk->tw_md5_keylen;
-- key = &tw_key;
-- } else
-- key = NULL;
--
-- if (key) {
-- int offset = (ts) ? 3 : 0;
--
-- rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
-- (TCPOPT_NOP << 16) |
-- (TCPOPT_MD5SIG << 8) |
-- TCPOLEN_MD5SIG);
-- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
-- rep.th.doff = arg.iov[0].iov_len/4;
--
-- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
-- key,
-- ip_hdr(skb)->daddr,
-- ip_hdr(skb)->saddr,
-- &rep.th, IPPROTO_TCP,
-- arg.iov[0].iov_len);
-- }
--#endif
-- arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
-- ip_hdr(skb)->saddr, /* XXX */
-- arg.iov[0].iov_len, IPPROTO_TCP, 0);
-- arg.csumoffset = offsetof(struct tcphdr, check) / 2;
-- if (twsk)
-- arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
--
-- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
--
-- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
--}
--
--static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
--{
-- struct inet_timewait_sock *tw = inet_twsk(sk);
-- struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
--
-- tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
-- tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
-- tcptw->tw_ts_recent);
--
-- inet_twsk_put(tw);
--}
--
--static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
-- struct request_sock *req)
--{
-- tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
-- tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
-- req->ts_recent);
--}
--
--/*
-- * Send a SYN-ACK after having received an ACK.
-- * This still operates on a request_sock only, not on a big
-- * socket.
-- */
--static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
-- struct dst_entry *dst)
--{
-- const struct inet_request_sock *ireq = inet_rsk(req);
-- int err = -1;
-- struct sk_buff * skb;
--
-- /* First, grab a route. */
-- if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
-- goto out;
--
-- skb = tcp_make_synack(sk, dst, req);
--
-- if (skb) {
-- struct tcphdr *th = tcp_hdr(skb);
--
-- th->check = tcp_v4_check(skb->len,
-- ireq->loc_addr,
-- ireq->rmt_addr,
-- csum_partial((char *)th, skb->len,
-- skb->csum));
--
-- err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
-- ireq->rmt_addr,
-- ireq->opt);
-- err = net_xmit_eval(err);
-- }
--
--out:
-- dst_release(dst);
-- return err;
--}
--
--/*
-- * IPv4 request_sock destructor.
-- */
--static void tcp_v4_reqsk_destructor(struct request_sock *req)
--{
-- kfree(inet_rsk(req)->opt);
--}
--
--#ifdef CONFIG_SYN_COOKIES
--static void syn_flood_warning(struct sk_buff *skb)
--{
-- static unsigned long warntime;
--
-- if (time_after(jiffies, (warntime + HZ * 60))) {
-- warntime = jiffies;
-- printk(KERN_INFO
-- "possible SYN flooding on port %d. Sending cookies.\n",
-- ntohs(tcp_hdr(skb)->dest));
-- }
--}
--#endif
--
--/*
-- * Save and compile IPv4 options into the request_sock if needed.
-- */
--static struct ip_options *tcp_v4_save_options(struct sock *sk,
-- struct sk_buff *skb)
--{
-- struct ip_options *opt = &(IPCB(skb)->opt);
-- struct ip_options *dopt = NULL;
--
-- if (opt && opt->optlen) {
-- int opt_size = optlength(opt);
-- dopt = kmalloc(opt_size, GFP_ATOMIC);
-- if (dopt) {
-- if (ip_options_echo(dopt, skb)) {
-- kfree(dopt);
-- dopt = NULL;
-- }
-- }
-- }
-- return dopt;
--}
--
--#ifdef CONFIG_TCP_MD5SIG
--/*
-- * RFC2385 MD5 checksumming requires a mapping of
-- * IP address->MD5 Key.
-- * We need to maintain these in the sk structure.
-- */
--
--/* Find the Key structure for an address. */
--static struct tcp_md5sig_key *
-- tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
--{
-- struct tcp_sock *tp = tcp_sk(sk);
-- int i;
--
-- if (!tp->md5sig_info || !tp->md5sig_info->entries4)
-- return NULL;
-- for (i = 0; i < tp->md5sig_info->entries4; i++) {
-- if (tp->md5sig_info->keys4[i].addr == addr)
-- return &tp->md5sig_info->keys4[i].base;
-- }
-- return NULL;
--}
--
--struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
-- struct sock *addr_sk)
--{
-- return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
--}
--
--EXPORT_SYMBOL(tcp_v4_md5_lookup);
--
--static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
-- struct request_sock *req)
--{
-- return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
--}
--
--/* This can be called on a newly created socket, from other files */
--int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
-- u8 *newkey, u8 newkeylen)
--{
-- /* Add Key to the list */
-- struct tcp4_md5sig_key *key;
-- struct tcp_sock *tp = tcp_sk(sk);
-- struct tcp4_md5sig_key *keys;
--
-- key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr);
-- if (key) {
-- /* Pre-existing entry - just update that one. */
-- kfree(key->base.key);
-- key->base.key = newkey;
-- key->base.keylen = newkeylen;
-- } else {
-- struct tcp_md5sig_info *md5sig;
--
-- if (!tp->md5sig_info) {
-- tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
-- GFP_ATOMIC);
-- if (!tp->md5sig_info) {
-- kfree(newkey);
-- return -ENOMEM;
-- }
-- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
-- }
-- if (tcp_alloc_md5sig_pool() == NULL) {
-- kfree(newkey);
-- return -ENOMEM;
-- }
-- md5sig = tp->md5sig_info;
--
-- if (md5sig->alloced4 == md5sig->entries4) {
-- keys = kmalloc((sizeof(*keys) *
-- (md5sig->entries4 + 1)), GFP_ATOMIC);
-- if (!keys) {
-- kfree(newkey);
-- tcp_free_md5sig_pool();
-- return -ENOMEM;
-- }
--
-- if (md5sig->entries4)
-- memcpy(keys, md5sig->keys4,
-- sizeof(*keys) * md5sig->entries4);
--
-- /* Free old key list, and reference new one */
-- if (md5sig->keys4)
-- kfree(md5sig->keys4);
-- md5sig->keys4 = keys;
-- md5sig->alloced4++;
-- }
-- md5sig->entries4++;
-- md5sig->keys4[md5sig->entries4 - 1].addr = addr;
-- md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
-- md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
-- }
-- return 0;
--}
--
--EXPORT_SYMBOL(tcp_v4_md5_do_add);
--
--static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
-- u8 *newkey, u8 newkeylen)
--{
-- return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
-- newkey, newkeylen);
--}
--
--int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
--{
-- struct tcp_sock *tp = tcp_sk(sk);
-- int i;
--
-- for (i = 0; i < tp->md5sig_info->entries4; i++) {
-- if (tp->md5sig_info->keys4[i].addr == addr) {
-- /* Free the key */
-- kfree(tp->md5sig_info->keys4[i].base.key);
-- tp->md5sig_info->entries4--;
--
-- if (tp->md5sig_info->entries4 == 0) {
-- kfree(tp->md5sig_info->keys4);
-- tp->md5sig_info->keys4 = NULL;
-- tp->md5sig_info->alloced4 = 0;
-- } else if (tp->md5sig_info->entries4 != i) {
-- /* Need to do some manipulation */
-- memcpy(&tp->md5sig_info->keys4[i],
-- &tp->md5sig_info->keys4[i+1],
-- (tp->md5sig_info->entries4 - i) *
-- sizeof(struct tcp4_md5sig_key));
-- }
-- tcp_free_md5sig_pool();
-- return 0;
-- }
-- }
-- return -ENOENT;
--}
--
--EXPORT_SYMBOL(tcp_v4_md5_do_del);
--
--static void tcp_v4_clear_md5_list(struct sock *sk)
--{
-- struct tcp_sock *tp = tcp_sk(sk);
--
-- /* Free each key, then the set of key keys,
-- * the crypto element, and then decrement our
-- * hold on the last resort crypto.
-- */
-- if (tp->md5sig_info->entries4) {
-- int i;
-- for (i = 0; i < tp->md5sig_info->entries4; i++)
-- kfree(tp->md5sig_info->keys4[i].base.key);
-- tp->md5sig_info->entries4 = 0;
-- tcp_free_md5sig_pool();
-- }
-- if (tp->md5sig_info->keys4) {
-- kfree(tp->md5sig_info->keys4);
-- tp->md5sig_info->keys4 = NULL;
-- tp->md5sig_info->alloced4 = 0;
-- }
--}
--
--static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
-- int optlen)
--{
-- struct tcp_md5sig cmd;
-- struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
-- u8 *newkey;
--
-- if (optlen < sizeof(cmd))
-- return -EINVAL;
--
-- if (copy_from_user(&cmd, optval, sizeof(cmd)))
-- return -EFAULT;
--
-- if (sin->sin_family != AF_INET)
-- return -EINVAL;
--
-- if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
-- if (!tcp_sk(sk)->md5sig_info)
-- return -ENOENT;
-- return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
-- }
--
-- if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
-- return -EINVAL;
--
-- if (!tcp_sk(sk)->md5sig_info) {
-- struct tcp_sock *tp = tcp_sk(sk);
-- struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
--
-- if (!p)
-- return -EINVAL;
--
-- tp->md5sig_info = p;
-- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
-- }
--
-- newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
-- if (!newkey)
-- return -ENOMEM;
-- return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
-- newkey, cmd.tcpm_keylen);
--}
--
--static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
-- __be32 saddr, __be32 daddr,
-- struct tcphdr *th, int protocol,
-- int tcplen)
--{
-- struct scatterlist sg[4];
-- __u16 data_len;
-- int block = 0;
-- __sum16 old_checksum;
-- struct tcp_md5sig_pool *hp;
-- struct tcp4_pseudohdr *bp;
-- struct hash_desc *desc;
-- int err;
-- unsigned int nbytes = 0;
--
-- /*
-- * Okay, so RFC2385 is turned on for this connection,
-- * so we need to generate the MD5 hash for the packet now.
-- */
--
-- hp = tcp_get_md5sig_pool();
-- if (!hp)
-- goto clear_hash_noput;
--
-- bp = &hp->md5_blk.ip4;
-- desc = &hp->md5_desc;
--
-- /*
-- * 1. the TCP pseudo-header (in the order: source IP address,
-- * destination IP address, zero-padded protocol number, and
-- * segment length)
-- */
-- bp->saddr = saddr;
-- bp->daddr = daddr;
-- bp->pad = 0;
-- bp->protocol = protocol;
-- bp->len = htons(tcplen);
-- sg_set_buf(&sg[block++], bp, sizeof(*bp));
-- nbytes += sizeof(*bp);
--
-- /* 2. the TCP header, excluding options, and assuming a
-- * checksum of zero/
-- */
-- old_checksum = th->check;
-- th->check = 0;
-- sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
-- nbytes += sizeof(struct tcphdr);
--
-- /* 3. the TCP segment data (if any) */
-- data_len = tcplen - (th->doff << 2);
-- if (data_len > 0) {
-- unsigned char *data = (unsigned char *)th + (th->doff << 2);
-- sg_set_buf(&sg[block++], data, data_len);
-- nbytes += data_len;
-- }
--
-- /* 4. an independently-specified key or password, known to both
-- * TCPs and presumably connection-specific
-- */
-- sg_set_buf(&sg[block++], key->key, key->keylen);
-- nbytes += key->keylen;
--
-- /* Now store the Hash into the packet */
-- err = crypto_hash_init(desc);
-- if (err)
-- goto clear_hash;
-- err = crypto_hash_update(desc, sg, nbytes);
-- if (err)
-- goto clear_hash;
-- err = crypto_hash_final(desc, md5_hash);
-- if (err)
-- goto clear_hash;
--
-- /* Reset header, and free up the crypto */
-- tcp_put_md5sig_pool();
-- th->check = old_checksum;
--
--out:
-- return 0;
--clear_hash:
-- tcp_put_md5sig_pool();
--clear_hash_noput:
-- memset(md5_hash, 0, 16);
-- goto out;
--}
--
--int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
-- struct sock *sk,
-- struct dst_entry *dst,
-- struct request_sock *req,
-- struct tcphdr *th, int protocol,
-- int tcplen)
--{
-- __be32 saddr, daddr;
--
-- if (sk) {
-- saddr = inet_sk(sk)->saddr;
-- daddr = inet_sk(sk)->daddr;
-- } else {
-- struct rtable *rt = (struct rtable *)dst;
-- BUG_ON(!rt);
-- saddr = rt->rt_src;
-- daddr = rt->rt_dst;
-- }
-- return tcp_v4_do_calc_md5_hash(md5_hash, key,
-- saddr, daddr,
-- th, protocol, tcplen);
--}
--
--EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
--
--static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
--{
-- /*
-- * This gets called for each TCP segment that arrives
-- * so we want to be efficient.
-- * We have 3 drop cases:
-- * o No MD5 hash and one expected.
-- * o MD5 hash and we're not expecting one.
-- * o MD5 hash and its wrong.
-- */
-- __u8 *hash_location = NULL;
-- struct tcp_md5sig_key *hash_expected;
-- const struct iphdr *iph = ip_hdr(skb);
-- struct tcphdr *th = tcp_hdr(skb);
-- int length = (th->doff << 2) - sizeof(struct tcphdr);
-- int genhash;
-- unsigned char *ptr;
-- unsigned char newhash[16];
--
-- hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
--
-- /*
-- * If the TCP option length is less than the TCP_MD5SIG
-- * option length, then we can shortcut
-- */
-- if (length < TCPOLEN_MD5SIG) {
-- if (hash_expected)
-- return 1;
-- else
-- return 0;
-- }
--
-- /* Okay, we can't shortcut - we have to grub through the options */
-- ptr = (unsigned char *)(th + 1);
-- while (length > 0) {
-- int opcode = *ptr++;
-- int opsize;
--
-- switch (opcode) {
-- case TCPOPT_EOL:
-- goto done_opts;
-- case TCPOPT_NOP:
-- length--;
-- continue;
-- default:
-- opsize = *ptr++;
-- if (opsize < 2)
-- goto done_opts;
-- if (opsize > length)
-- goto done_opts;
--
-- if (opcode == TCPOPT_MD5SIG) {
-- hash_location = ptr;
-- goto done_opts;
-- }
-- }
-- ptr += opsize-2;
-- length -= opsize;
-- }
--done_opts:
-- /* We've parsed the options - do we have a hash? */
-- if (!hash_expected && !hash_location)
-- return 0;
--
-- if (hash_expected && !hash_location) {
-- LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found "
-- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
-- NIPQUAD(iph->saddr), ntohs(th->source),
-- NIPQUAD(iph->daddr), ntohs(th->dest));
-- return 1;
-- }
--
-- if (!hash_expected && hash_location) {
-- LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
-- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
-- NIPQUAD(iph->saddr), ntohs(th->source),
-- NIPQUAD(iph->daddr), ntohs(th->dest));
-- return 1;
-- }
--
-- /* Okay, so this is hash_expected and hash_location -
-- * so we need to calculate the checksum.
-- */
-- genhash = tcp_v4_do_calc_md5_hash(newhash,
-- hash_expected,
-- iph->saddr, iph->daddr,
-- th, sk->sk_protocol,
-- skb->len);
--
-- if (genhash || memcmp(hash_location, newhash, 16) != 0) {
-- if (net_ratelimit()) {
-- printk(KERN_INFO "MD5 Hash failed for "
-- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
-- NIPQUAD(iph->saddr), ntohs(th->source),
-- NIPQUAD(iph->daddr), ntohs(th->dest),
-- genhash ? " tcp_v4_calc_md5_hash failed" : "");
-- }
-- return 1;
-- }
-- return 0;
--}
--
--#endif
--
--struct request_sock_ops tcp_request_sock_ops __read_mostly = {
-- .family = PF_INET,
-- .obj_size = sizeof(struct tcp_request_sock),
-- .rtx_syn_ack = tcp_v4_send_synack,
-- .send_ack = tcp_v4_reqsk_send_ack,
-- .destructor = tcp_v4_reqsk_destructor,
-- .send_reset = tcp_v4_send_reset,
--};
--
--#ifdef CONFIG_TCP_MD5SIG
--static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
-- .md5_lookup = tcp_v4_reqsk_md5_lookup,
--};
--#endif
--
--static struct timewait_sock_ops tcp_timewait_sock_ops = {
-- .twsk_obj_size = sizeof(struct tcp_timewait_sock),
-- .twsk_unique = tcp_twsk_unique,
-- .twsk_destructor= tcp_twsk_destructor,
--};
--
--int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
--{
-- struct inet_request_sock *ireq;
-- struct tcp_options_received tmp_opt;
-- struct request_sock *req;
-- __be32 saddr = ip_hdr(skb)->saddr;
-- __be32 daddr = ip_hdr(skb)->daddr;
-- __u32 isn = TCP_SKB_CB(skb)->when;
-- struct dst_entry *dst = NULL;
--#ifdef CONFIG_SYN_COOKIES
-- int want_cookie = 0;
--#else
--#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
--#endif
--
-- /* Never answer to SYNs send to broadcast or multicast */
-- if (((struct rtable *)skb->dst)->rt_flags &
-- (RTCF_BROADCAST | RTCF_MULTICAST))
-- goto drop;
--
-- /* TW buckets are converted to open requests without
-- * limitations, they conserve resources and peer is
-- * evidently real one.
-- */
-- if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
--#ifdef CONFIG_SYN_COOKIES
-- if (sysctl_tcp_syncookies) {
-- want_cookie = 1;
-- } else
--#endif
-- goto drop;
-- }
--
-- /* Accept backlog is full. If we have already queued enough
-- * of warm entries in syn queue, drop request. It is better than
-- * clogging syn queue with openreqs with exponentially increasing
-- * timeout.
-- */
-- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
-- goto drop;
--
-- req = reqsk_alloc(&tcp_request_sock_ops);
-- if (!req)
-- goto drop;
--
--#ifdef CONFIG_TCP_MD5SIG
-- tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
--#endif
--
-- tcp_clear_options(&tmp_opt);
-- tmp_opt.mss_clamp = 536;
-- tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
--
-- tcp_parse_options(skb, &tmp_opt, 0);
--
-- if (want_cookie) {
-- tcp_clear_options(&tmp_opt);
-- tmp_opt.saw_tstamp = 0;
-- }
--
-- if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
-- /* Some OSes (unknown ones, but I see them on web server, which
-- * contains information interesting only for windows'
-- * users) do not send their stamp in SYN. It is easy case.
-- * We simply do not advertise TS support.
-- */
-- tmp_opt.saw_tstamp = 0;
-- tmp_opt.tstamp_ok = 0;
-- }
-- tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
--
-- tcp_openreq_init(req, &tmp_opt, skb);
--
-- if (security_inet_conn_request(sk, skb, req))
-- goto drop_and_free;
--
-- ireq = inet_rsk(req);
-- ireq->loc_addr = daddr;
-- ireq->rmt_addr = saddr;
-- ireq->opt = tcp_v4_save_options(sk, skb);
-- if (!want_cookie)
-- TCP_ECN_create_request(req, tcp_hdr(skb));
--
-- if (want_cookie) {
--#ifdef CONFIG_SYN_COOKIES
-- syn_flood_warning(skb);
--#endif
-- isn = cookie_v4_init_sequence(sk, skb, &req->mss);
-- } else if (!isn) {
-- struct inet_peer *peer = NULL;
--
-- /* VJ's idea. We save last timestamp seen
-- * from the destination in peer table, when entering
-- * state TIME-WAIT, and check against it before
-- * accepting new connection request.
-- *
-- * If "isn" is not zero, this request hit alive
-- * timewait bucket, so that all the necessary checks
-- * are made in the function processing timewait state.
-- */
-- if (tmp_opt.saw_tstamp &&
-- tcp_death_row.sysctl_tw_recycle &&
-- (dst = inet_csk_route_req(sk, req)) != NULL &&
-- (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
-- peer->v4daddr == saddr) {
-- if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
-- (s32)(peer->tcp_ts - req->ts_recent) >
-- TCP_PAWS_WINDOW) {
-- NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
-- dst_release(dst);
-- goto drop_and_free;
-- }
-- }
-- /* Kill the following clause, if you dislike this way. */
-- else if (!sysctl_tcp_syncookies &&
-- (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
-- (sysctl_max_syn_backlog >> 2)) &&
-- (!peer || !peer->tcp_ts_stamp) &&
-- (!dst || !dst_metric(dst, RTAX_RTT))) {
-- /* Without syncookies last quarter of
-- * backlog is filled with destinations,
-- * proven to be alive.
-- * It means that we continue to communicate
-- * to destinations, already remembered
-- * to the moment of synflood.
-- */
-- LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
-- "request from %u.%u.%u.%u/%u\n",
-- NIPQUAD(saddr),
-- ntohs(tcp_hdr(skb)->source));
-- dst_release(dst);
-- goto drop_and_free;
-- }
--
-- isn = tcp_v4_init_sequence(skb);
-- }
-- tcp_rsk(req)->snt_isn = isn;
--
-- if (tcp_v4_send_synack(sk, req, dst))
-- goto drop_and_free;
--
-- if (want_cookie) {
-- reqsk_free(req);
-- } else {
-- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-- }
-- return 0;
--
--drop_and_free:
-- reqsk_free(req);
--drop:
-- return 0;
--}
--
--
--/*
-- * The three way handshake has completed - we got a valid synack -
-- * now create the new socket.
-- */
--struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
-- struct request_sock *req,
-- struct dst_entry *dst)
--{
-- struct inet_request_sock *ireq;
-- struct inet_sock *newinet;
-- struct tcp_sock *newtp;
-- struct sock *newsk;
--#ifdef CONFIG_TCP_MD5SIG
-- struct tcp_md5sig_key *key;
--#endif
--
-- if (sk_acceptq_is_full(sk))
-- goto exit_overflow;
--
-- if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
-- goto exit;
--
-- newsk = tcp_create_openreq_child(sk, req, skb);
-- if (!newsk)
-- goto exit;
--
-- newsk->sk_gso_type = SKB_GSO_TCPV4;
-- sk_setup_caps(newsk, dst);
--
-- newtp = tcp_sk(newsk);
-- newinet = inet_sk(newsk);
-- ireq = inet_rsk(req);
-- newinet->daddr = ireq->rmt_addr;
-- newinet->rcv_saddr = ireq->loc_addr;
-- newinet->saddr = ireq->loc_addr;
-- newinet->opt = ireq->opt;
-- ireq->opt = NULL;
-- newinet->mc_index = inet_iif(skb);
-- newinet->mc_ttl = ip_hdr(skb)->ttl;
-- inet_csk(newsk)->icsk_ext_hdr_len = 0;
-- if (newinet->opt)
-- inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
-- newinet->id = newtp->write_seq ^ jiffies;
--
-- tcp_mtup_init(newsk);
-- tcp_sync_mss(newsk, dst_mtu(dst));
-- newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
-- tcp_initialize_rcv_mss(newsk);
--
--#ifdef CONFIG_TCP_MD5SIG
-- /* Copy over the MD5 key from the original socket */
-- if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
-- /*
-- * We're using one, so create a matching key
-- * on the newsk structure. If we fail to get
-- * memory, then we end up not copying the key
-- * across. Shucks.
-- */
-- char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
-- if (newkey != NULL)
-- tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
-- newkey, key->keylen);
-- }
--#endif
--
-- __inet_hash(&tcp_hashinfo, newsk, 0);
-- __inet_inherit_port(&tcp_hashinfo, sk, newsk);
--
-- return newsk;
--
--exit_overflow:
-- NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
--exit:
-- NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
-- dst_release(dst);
-- return NULL;
--}
--
--static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
--{
-- struct tcphdr *th = tcp_hdr(skb);
-- const struct iphdr *iph = ip_hdr(skb);
-- struct sock *nsk;
-- struct request_sock **prev;
-- /* Find possible connection requests. */
-- struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
-- iph->saddr, iph->daddr);
-- if (req)
-- return tcp_check_req(sk, skb, req, prev);
--
-- nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
-- iph->daddr, th->dest, inet_iif(skb));
--
-- if (nsk) {
-- if (nsk->sk_state != TCP_TIME_WAIT) {
-- bh_lock_sock(nsk);
-- return nsk;
-- }
-- inet_twsk_put(inet_twsk(nsk));
-- return NULL;
-- }
--
--#ifdef CONFIG_SYN_COOKIES
-- if (!th->rst && !th->syn && th->ack)
-- sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
--#endif
-- return sk;
--}
--
--static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
--{
-- const struct iphdr *iph = ip_hdr(skb);
--
-- if (skb->ip_summed == CHECKSUM_COMPLETE) {
-- if (!tcp_v4_check(skb->len, iph->saddr,
-- iph->daddr, skb->csum)) {
-- skb->ip_summed = CHECKSUM_UNNECESSARY;
-- return 0;
-- }
-- }
--
-- skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-- skb->len, IPPROTO_TCP, 0);
--
-- if (skb->len <= 76) {
-- return __skb_checksum_complete(skb);
-- }
-- return 0;
--}
--
--
--/* The socket must have it's spinlock held when we get
-- * here.
-- *
-- * We have a potential double-lock case here, so even when
-- * doing backlog processing we use the BH locking scheme.
-- * This is because we cannot sleep with the original spinlock
-- * held.
-- */
--int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
--{
-- struct sock *rsk;
--#ifdef CONFIG_TCP_MD5SIG
-- /*
-- * We really want to reject the packet as early as possible
-- * if:
-- * o We're expecting an MD5'd packet and this is no MD5 tcp option
-- * o There is an MD5 option and we're not expecting one
-- */
-- if (tcp_v4_inbound_md5_hash(sk, skb))
-- goto discard;
--#endif
--
-- if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
-- TCP_CHECK_TIMER(sk);
-- if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
-- rsk = sk;
-- goto reset;
-- }
-- TCP_CHECK_TIMER(sk);
-- return 0;
-- }
--
-- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
-- goto csum_err;
--
-- if (sk->sk_state == TCP_LISTEN) {
-- struct sock *nsk = tcp_v4_hnd_req(sk, skb);
-- if (!nsk)
-- goto discard;
--
-- if (nsk != sk) {
-- if (tcp_child_process(sk, nsk, skb)) {
-- rsk = nsk;
-- goto reset;
-- }
-- return 0;
-- }
-- }
--
-- TCP_CHECK_TIMER(sk);
-- if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
-- rsk = sk;
-- goto reset;
-- }
-- TCP_CHECK_TIMER(sk);
-- return 0;
--
--reset:
-- tcp_v4_send_reset(rsk, skb);
--discard:
-- kfree_skb(skb);
-- /* Be careful here. If this function gets more complicated and
-- * gcc suffers from register pressure on the x86, sk (in %ebx)
-- * might be destroyed here. This current version compiles correctly,
-- * but you have been warned.
-- */
-- return 0;
--
--csum_err:
-- TCP_INC_STATS_BH(TCP_MIB_INERRS);
-- goto discard;
--}
--
--/*
-- * From tcp_input.c
-- */
--
--int tcp_v4_rcv(struct sk_buff *skb)
--{
-- const struct iphdr *iph;
-- struct tcphdr *th;
-- struct sock *sk;
-- int ret;
--
-- if (skb->pkt_type != PACKET_HOST)
-- goto discard_it;
--
-- /* Count it even if it's bad */
-- TCP_INC_STATS_BH(TCP_MIB_INSEGS);
--
-- if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
-- goto discard_it;
--
-- th = tcp_hdr(skb);
--
-- if (th->doff < sizeof(struct tcphdr) / 4)
-- goto bad_packet;
-- if (!pskb_may_pull(skb, th->doff * 4))
-- goto discard_it;
--
-- /* An explanation is required here, I think.
-- * Packet length and doff are validated by header prediction,
-- * provided case of th->doff==0 is eliminated.
-- * So, we defer the checks. */
-- if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
-- goto bad_packet;
--
-- th = tcp_hdr(skb);
-- iph = ip_hdr(skb);
-- TCP_SKB_CB(skb)->seq = ntohl(th->seq);
-- TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
-- skb->len - th->doff * 4);
-- TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
-- TCP_SKB_CB(skb)->when = 0;
-- TCP_SKB_CB(skb)->flags = iph->tos;
-- TCP_SKB_CB(skb)->sacked = 0;
--
-- sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
-- iph->daddr, th->dest, inet_iif(skb));
-- if (!sk)
-- goto no_tcp_socket;
--
--process:
-- if (sk->sk_state == TCP_TIME_WAIT)
-- goto do_time_wait;
--
-- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
-- goto discard_and_relse;
-- nf_reset(skb);
--
-- if (sk_filter(sk, skb))
-- goto discard_and_relse;
--
-- skb->dev = NULL;
--
-- bh_lock_sock_nested(sk);
-- ret = 0;
-- if (!sock_owned_by_user(sk)) {
--#ifdef CONFIG_NET_DMA
-- struct tcp_sock *tp = tcp_sk(sk);
-- if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-- tp->ucopy.dma_chan = get_softnet_dma();
-- if (tp->ucopy.dma_chan)
-- ret = tcp_v4_do_rcv(sk, skb);
-- else
--#endif
-- {
-- if (!tcp_prequeue(sk, skb))
-- ret = tcp_v4_do_rcv(sk, skb);
-- }
-- } else
-- sk_add_backlog(sk, skb);
-- bh_unlock_sock(sk);
--
-- sock_put(sk);
--
-- return ret;
--
--no_tcp_socket:
-- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
-- goto discard_it;
--
-- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
--bad_packet:
-- TCP_INC_STATS_BH(TCP_MIB_INERRS);
-- } else {
-- tcp_v4_send_reset(NULL, skb);
-- }
--
--discard_it:
-- /* Discard frame. */
-- kfree_skb(skb);
-- return 0;
--
--discard_and_relse:
-- sock_put(sk);
-- goto discard_it;
--
--do_time_wait:
-- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-- inet_twsk_put(inet_twsk(sk));
-- goto discard_it;
-- }
--
-- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
-- TCP_INC_STATS_BH(TCP_MIB_INERRS);
-- inet_twsk_put(inet_twsk(sk));
-- goto discard_it;
-- }
-- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
-- case TCP_TW_SYN: {
-- struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
-- iph->daddr, th->dest,
-- inet_iif(skb));
-- if (sk2) {
-- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
-- inet_twsk_put(inet_twsk(sk));
-- sk = sk2;
-- goto process;
-- }
-- /* Fall through to ACK */
-- }
-- case TCP_TW_ACK:
-- tcp_v4_timewait_ack(sk, skb);
-- break;
-- case TCP_TW_RST:
-- goto no_tcp_socket;
-- case TCP_TW_SUCCESS:;
-- }
-- goto discard_it;
--}
--
--/* VJ's idea. Save last timestamp seen from this destination
-- * and hold it at least for normal timewait interval to use for duplicate
-- * segment detection in subsequent connections, before they enter synchronized
-- * state.
-- */
--
--int tcp_v4_remember_stamp(struct sock *sk)
--{
-- struct inet_sock *inet = inet_sk(sk);
-- struct tcp_sock *tp = tcp_sk(sk);
-- struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
-- struct inet_peer *peer = NULL;
-- int release_it = 0;
--
-- if (!rt || rt->rt_dst != inet->daddr) {
-- peer = inet_getpeer(inet->daddr, 1);
-- release_it = 1;
-- } else {
-- if (!rt->peer)
-- rt_bind_peer(rt, 1);
-- peer = rt->peer;
-- }
--
-- if (peer) {
-- if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
-- (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
-- peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
-- peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
-- peer->tcp_ts = tp->rx_opt.ts_recent;
-- }
-- if (release_it)
-- inet_putpeer(peer);
-- return 1;
-- }
--
-- return 0;
--}
--
--int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
--{
-- struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
--
-- if (peer) {
-- const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
--
-- if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
-- (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
-- peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
-- peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
-- peer->tcp_ts = tcptw->tw_ts_recent;
-- }
-- inet_putpeer(peer);
-- return 1;
-- }
--
-- return 0;
--}
--
--struct inet_connection_sock_af_ops ipv4_specific = {
-- .queue_xmit = ip_queue_xmit,
-- .send_check = tcp_v4_send_check,
-- .rebuild_header = inet_sk_rebuild_header,
-- .conn_request = tcp_v4_conn_request,
-- .syn_recv_sock = tcp_v4_syn_recv_sock,
-- .remember_stamp = tcp_v4_remember_stamp,
-- .net_header_len = sizeof(struct iphdr),
-- .setsockopt = ip_setsockopt,
-- .getsockopt = ip_getsockopt,
-- .addr2sockaddr = inet_csk_addr2sockaddr,
-- .sockaddr_len = sizeof(struct sockaddr_in),
--#ifdef CONFIG_COMPAT
-- .compat_setsockopt = compat_ip_setsockopt,
-- .compat_getsockopt = compat_ip_getsockopt,
--#endif
--};
--
--#ifdef CONFIG_TCP_MD5SIG
--static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
-- .md5_lookup = tcp_v4_md5_lookup,
-- .calc_md5_hash = tcp_v4_calc_md5_hash,
-- .md5_add = tcp_v4_md5_add_func,
-- .md5_parse = tcp_v4_parse_md5_keys,
--};
--#endif
--
--/* NOTE: A lot of things set to zero explicitly by call to
-- * sk_alloc() so need not be done here.
-- */
--static int tcp_v4_init_sock(struct sock *sk)
--{
-- struct inet_connection_sock *icsk = inet_csk(sk);
-- struct tcp_sock *tp = tcp_sk(sk);
--
-- skb_queue_head_init(&tp->out_of_order_queue);
-- tcp_init_xmit_timers(sk);
-- tcp_prequeue_init(tp);
--
-- icsk->icsk_rto = TCP_TIMEOUT_INIT;
-- tp->mdev = TCP_TIMEOUT_INIT;
--
-- /* So many TCP implementations out there (incorrectly) count the
-- * initial SYN frame in their delayed-ACK and congestion control
-- * algorithms that we must have the following bandaid to talk
-- * efficiently to them. -DaveM
-- */
-- tp->snd_cwnd = 2;
--
-- /* See draft-stevens-tcpca-spec-01 for discussion of the
-- * initialization of these values.
-- */
-- tp->snd_ssthresh = 0x7fffffff; /* Infinity */
-- tp->snd_cwnd_clamp = ~0;
-- tp->mss_cache = 536;
--
-- tp->reordering = sysctl_tcp_reordering;
-- icsk->icsk_ca_ops = &tcp_init_congestion_ops;
--
-- sk->sk_state = TCP_CLOSE;
--
-- sk->sk_write_space = sk_stream_write_space;
-- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
--
-- icsk->icsk_af_ops = &ipv4_specific;
-- icsk->icsk_sync_mss = tcp_sync_mss;
--#ifdef CONFIG_TCP_MD5SIG
-- tp->af_specific = &tcp_sock_ipv4_specific;
--#endif
--
-- sk->sk_sndbuf = sysctl_tcp_wmem[1];
-- sk->sk_rcvbuf = sysctl_tcp_rmem[1];
--
-- atomic_inc(&tcp_sockets_allocated);
--
-- return 0;
--}
--
--int tcp_v4_destroy_sock(struct sock *sk)
--{
-- struct tcp_sock *tp = tcp_sk(sk);
--
-- tcp_clear_xmit_timers(sk);
--
-- tcp_cleanup_congestion_control(sk);
--
-- /* Cleanup up the write buffer. */
-- tcp_write_queue_purge(sk);
--
-- /* Cleans up our, hopefully empty, out_of_order_queue. */
-- __skb_queue_purge(&tp->out_of_order_queue);
--
--#ifdef CONFIG_TCP_MD5SIG
-- /* Clean up the MD5 key list, if any */
-- if (tp->md5sig_info) {
-- tcp_v4_clear_md5_list(sk);
-- kfree(tp->md5sig_info);
-- tp->md5sig_info = NULL;
-- }
--#endif
--
--#ifdef CONFIG_NET_DMA
-- /* Cleans up our sk_async_wait_queue */
-- __skb_queue_purge(&sk->sk_async_wait_queue);
--#endif
--
-- /* Clean prequeue, it must be empty really */
-- __skb_queue_purge(&tp->ucopy.prequeue);
--
-- /* Clean up a referenced TCP bind bucket. */
-- if (inet_csk(sk)->icsk_bind_hash)
-- inet_put_port(&tcp_hashinfo, sk);
--
-- /*
-- * If sendmsg cached page exists, toss it.
-- */
-- if (sk->sk_sndmsg_page) {
-- __free_page(sk->sk_sndmsg_page);
-- sk->sk_sndmsg_page = NULL;
-- }
--
-- atomic_dec(&tcp_sockets_allocated);
--
-- return 0;
--}
--
--EXPORT_SYMBOL(tcp_v4_destroy_sock);
--
--#ifdef CONFIG_PROC_FS
--/* Proc filesystem TCP sock list dumping. */
--
--static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
--{
-- return hlist_empty(head) ? NULL :
-- list_entry(head->first, struct inet_timewait_sock, tw_node);
--}
--
--static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
--{
-- return tw->tw_node.next ?
-- hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
--}
--
--static void *listening_get_next(struct seq_file *seq, void *cur)
--{
-- struct inet_connection_sock *icsk;
-- struct hlist_node *node;
-- struct sock *sk = cur;
-- struct tcp_iter_state* st = seq->private;
--
-- if (!sk) {
-- st->bucket = 0;
-- sk = sk_head(&tcp_hashinfo.listening_hash[0]);
-- goto get_sk;
-- }
--
-- ++st->num;
--
-- if (st->state == TCP_SEQ_STATE_OPENREQ) {
-- struct request_sock *req = cur;
--
-- icsk = inet_csk(st->syn_wait_sk);
-- req = req->dl_next;
-- while (1) {
-- while (req) {
-- if (req->rsk_ops->family == st->family) {
-- cur = req;
-- goto out;
-- }
-- req = req->dl_next;
-- }
-- if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
-- break;
--get_req:
-- req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
-- }
-- sk = sk_next(st->syn_wait_sk);
-- st->state = TCP_SEQ_STATE_LISTENING;
-- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-- } else {
-- icsk = inet_csk(sk);
-- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-- if (reqsk_queue_len(&icsk->icsk_accept_queue))
-- goto start_req;
-- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-- sk = sk_next(sk);
-- }
--get_sk:
-- sk_for_each_from(sk, node) {
-- if (sk->sk_family == st->family) {
-- cur = sk;
-- goto out;
-- }
-- icsk = inet_csk(sk);
-- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-- if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
--start_req:
-- st->uid = sock_i_uid(sk);
-- st->syn_wait_sk = sk;
-- st->state = TCP_SEQ_STATE_OPENREQ;
-- st->sbucket = 0;
-- goto get_req;
-- }
-- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-- }
-- if (++st->bucket < INET_LHTABLE_SIZE) {
-- sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
-- goto get_sk;
-- }
-- cur = NULL;
--out:
-- return cur;
--}
--
--static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
--{
-- void *rc = listening_get_next(seq, NULL);
--
-- while (rc && *pos) {
-- rc = listening_get_next(seq, rc);
-- --*pos;
-- }
-- return rc;
--}
--
--static void *established_get_first(struct seq_file *seq)
--{
-- struct tcp_iter_state* st = seq->private;
-- void *rc = NULL;
--
-- for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
-- struct sock *sk;
-- struct hlist_node *node;
-- struct inet_timewait_sock *tw;
--
-- /* We can reschedule _before_ having picked the target: */
-- cond_resched_softirq();
--
-- read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
-- sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
-- if (sk->sk_family != st->family) {
-- continue;
-- }
-- rc = sk;
-- goto out;
-- }
-- st->state = TCP_SEQ_STATE_TIME_WAIT;
-- inet_twsk_for_each(tw, node,
-- &tcp_hashinfo.ehash[st->bucket].twchain) {
-- if (tw->tw_family != st->family) {
-- continue;
-- }
-- rc = tw;
-- goto out;
-- }
-- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
-- st->state = TCP_SEQ_STATE_ESTABLISHED;
-- }
--out:
-- return rc;
--}
--
--static void *established_get_next(struct seq_file *seq, void *cur)
--{
-- struct sock *sk = cur;
-- struct inet_timewait_sock *tw;
-- struct hlist_node *node;
-- struct tcp_iter_state* st = seq->private;
--
-- ++st->num;
--
-- if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
-- tw = cur;
-- tw = tw_next(tw);
--get_tw:
-- while (tw && tw->tw_family != st->family) {
-- tw = tw_next(tw);
-- }
-- if (tw) {
-- cur = tw;
-- goto out;
-- }
-- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
-- st->state = TCP_SEQ_STATE_ESTABLISHED;
--
-- /* We can reschedule between buckets: */
-- cond_resched_softirq();
--
-- if (++st->bucket < tcp_hashinfo.ehash_size) {
-- read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
-- sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
-- } else {
-- cur = NULL;
-- goto out;
-- }
-- } else
-- sk = sk_next(sk);
--
-- sk_for_each_from(sk, node) {
-- if (sk->sk_family == st->family)
-- goto found;
-- }
--
-- st->state = TCP_SEQ_STATE_TIME_WAIT;
-- tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
-- goto get_tw;
--found:
-- cur = sk;
--out:
-- return cur;
--}
--
--static void *established_get_idx(struct seq_file *seq, loff_t pos)
--{
-- void *rc = established_get_first(seq);
--
-- while (rc && pos) {
-- rc = established_get_next(seq, rc);
-- --pos;
-- }
-- return rc;
--}
--
--static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
--{
-- void *rc;
-- struct tcp_iter_state* st = seq->private;
--
-- inet_listen_lock(&tcp_hashinfo);
-- st->state = TCP_SEQ_STATE_LISTENING;
-- rc = listening_get_idx(seq, &pos);
--
-- if (!rc) {
-- inet_listen_unlock(&tcp_hashinfo);
-- local_bh_disable();
-- st->state = TCP_SEQ_STATE_ESTABLISHED;
-- rc = established_get_idx(seq, pos);
-- }
--
-- return rc;
--}
--
--static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
--{
-- struct tcp_iter_state* st = seq->private;
-- st->state = TCP_SEQ_STATE_LISTENING;
-- st->num = 0;
-- return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
--}
--
--static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
--{
-- void *rc = NULL;
-- struct tcp_iter_state* st;
--
-- if (v == SEQ_START_TOKEN) {
-- rc = tcp_get_idx(seq, 0);
-- goto out;
-- }
-- st = seq->private;
--
-- switch (st->state) {
-- case TCP_SEQ_STATE_OPENREQ:
-- case TCP_SEQ_STATE_LISTENING:
-- rc = listening_get_next(seq, v);
-- if (!rc) {
-- inet_listen_unlock(&tcp_hashinfo);
-- local_bh_disable();
-- st->state = TCP_SEQ_STATE_ESTABLISHED;
-- rc = established_get_first(seq);
-- }
-- break;
-- case TCP_SEQ_STATE_ESTABLISHED:
-- case TCP_SEQ_STATE_TIME_WAIT:
-- rc = established_get_next(seq, v);
-- break;
-- }
--out:
-- ++*pos;
-- return rc;
--}
--
--static void tcp_seq_stop(struct seq_file *seq, void *v)
--{
-- struct tcp_iter_state* st = seq->private;
--
-- switch (st->state) {
-- case TCP_SEQ_STATE_OPENREQ:
-- if (v) {
-- struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
-- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-- }
-- case TCP_SEQ_STATE_LISTENING:
-- if (v != SEQ_START_TOKEN)
-- inet_listen_unlock(&tcp_hashinfo);
-- break;
-- case TCP_SEQ_STATE_TIME_WAIT:
-- case TCP_SEQ_STATE_ESTABLISHED:
-- if (v)
-- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
-- local_bh_enable();
-- break;
-- }
--}
--
--static int tcp_seq_open(struct inode *inode, struct file *file)
--{
-- struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
-- struct seq_file *seq;
-- struct tcp_iter_state *s;
-- int rc;
--
-- if (unlikely(afinfo == NULL))
-- return -EINVAL;
--
-- s = kzalloc(sizeof(*s), GFP_KERNEL);
-- if (!s)
-- return -ENOMEM;
-- s->family = afinfo->family;
-- s->seq_ops.start = tcp_seq_start;
-- s->seq_ops.next = tcp_seq_next;
-- s->seq_ops.show = afinfo->seq_show;
-- s->seq_ops.stop = tcp_seq_stop;
--
-- rc = seq_open(file, &s->seq_ops);
-- if (rc)
-- goto out_kfree;
-- seq = file->private_data;
-- seq->private = s;
--out:
-- return rc;
--out_kfree:
-- kfree(s);
-- goto out;
--}
--
--int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
--{
-- int rc = 0;
-- struct proc_dir_entry *p;
--
-- if (!afinfo)
-- return -EINVAL;
-- afinfo->seq_fops->owner = afinfo->owner;
-- afinfo->seq_fops->open = tcp_seq_open;
-- afinfo->seq_fops->read = seq_read;
-- afinfo->seq_fops->llseek = seq_lseek;
-- afinfo->seq_fops->release = seq_release_private;
--
-- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
-- if (p)
-- p->data = afinfo;
-- else
-- rc = -ENOMEM;
-- return rc;
--}
--
--void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
--{
-- if (!afinfo)
-- return;
-- proc_net_remove(afinfo->name);
-- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
--}
--
--static void get_openreq4(struct sock *sk, struct request_sock *req,
-- char *tmpbuf, int i, int uid)
--{
-- const struct inet_request_sock *ireq = inet_rsk(req);
-- int ttd = req->expires - jiffies;
--
-- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
-- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
-- i,
-- ireq->loc_addr,
-- ntohs(inet_sk(sk)->sport),
-- ireq->rmt_addr,
-- ntohs(ireq->rmt_port),
-- TCP_SYN_RECV,
-- 0, 0, /* could print option size, but that is af dependent. */
-- 1, /* timers active (only the expire timer) */
-- jiffies_to_clock_t(ttd),
-- req->retrans,
-- uid,
-- 0, /* non standard timer */
-- 0, /* open_requests have no inode */
-- atomic_read(&sk->sk_refcnt),
-- req);
--}
--
--static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i)
--{
-- int timer_active;
-- unsigned long timer_expires;
-- struct tcp_sock *tp = tcp_sk(sk);
-- const struct inet_connection_sock *icsk = inet_csk(sk);
-- struct inet_sock *inet = inet_sk(sk);
-- __be32 dest = inet->daddr;
-- __be32 src = inet->rcv_saddr;
-- __u16 destp = ntohs(inet->dport);
-- __u16 srcp = ntohs(inet->sport);
--
-- if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
-- timer_active = 1;
-- timer_expires = icsk->icsk_timeout;
-- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
-- timer_active = 4;
-- timer_expires = icsk->icsk_timeout;
-- } else if (timer_pending(&sk->sk_timer)) {
-- timer_active = 2;
-- timer_expires = sk->sk_timer.expires;
-- } else {
-- timer_active = 0;
-- timer_expires = jiffies;
-- }
--
-- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
-- "%08X %5d %8d %lu %d %p %u %u %u %u %d",
-- i, src, srcp, dest, destp, sk->sk_state,
-- tp->write_seq - tp->snd_una,
-- sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
-- (tp->rcv_nxt - tp->copied_seq),
-- timer_active,
-- jiffies_to_clock_t(timer_expires - jiffies),
-- icsk->icsk_retransmits,
-- sock_i_uid(sk),
-- icsk->icsk_probes_out,
-- sock_i_ino(sk),
-- atomic_read(&sk->sk_refcnt), sk,
-- icsk->icsk_rto,
-- icsk->icsk_ack.ato,
-- (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
-- tp->snd_cwnd,
-- tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
--}
--
--static void get_timewait4_sock(struct inet_timewait_sock *tw,
-- char *tmpbuf, int i)
--{
-- __be32 dest, src;
-- __u16 destp, srcp;
-- int ttd = tw->tw_ttd - jiffies;
--
-- if (ttd < 0)
-- ttd = 0;
--
-- dest = tw->tw_daddr;
-- src = tw->tw_rcv_saddr;
-- destp = ntohs(tw->tw_dport);
-- srcp = ntohs(tw->tw_sport);
--
-- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
-- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
-- i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
-- 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
-- atomic_read(&tw->tw_refcnt), tw);
--}
--
--#define TMPSZ 150
--
--static int tcp4_seq_show(struct seq_file *seq, void *v)
--{
-- struct tcp_iter_state* st;
-- char tmpbuf[TMPSZ + 1];
--
-- if (v == SEQ_START_TOKEN) {
-- seq_printf(seq, "%-*s\n", TMPSZ - 1,
-- " sl local_address rem_address st tx_queue "
-- "rx_queue tr tm->when retrnsmt uid timeout "
-- "inode");
-- goto out;
-- }
-- st = seq->private;
--
-- switch (st->state) {
-- case TCP_SEQ_STATE_LISTENING:
-- case TCP_SEQ_STATE_ESTABLISHED:
-- get_tcp4_sock(v, tmpbuf, st->num);
-- break;
-- case TCP_SEQ_STATE_OPENREQ:
-- get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
-- break;
-- case TCP_SEQ_STATE_TIME_WAIT:
-- get_timewait4_sock(v, tmpbuf, st->num);
-- break;
-- }
-- seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
--out:
-- return 0;
--}
--
--static struct file_operations tcp4_seq_fops;
--static struct tcp_seq_afinfo tcp4_seq_afinfo = {
-- .owner = THIS_MODULE,
-- .name = "tcp",
-- .family = AF_INET,
-- .seq_show = tcp4_seq_show,
-- .seq_fops = &tcp4_seq_fops,
--};
--
--int __init tcp4_proc_init(void)
--{
-- return tcp_proc_register(&tcp4_seq_afinfo);
--}
--
--void tcp4_proc_exit(void)
--{
-- tcp_proc_unregister(&tcp4_seq_afinfo);
--}
--#endif /* CONFIG_PROC_FS */
--
--struct proto tcp_prot = {
-- .name = "TCP",
-- .owner = THIS_MODULE,
-- .close = tcp_close,
-- .connect = tcp_v4_connect,
-- .disconnect = tcp_disconnect,
-- .accept = inet_csk_accept,
-- .ioctl = tcp_ioctl,
-- .init = tcp_v4_init_sock,
-- .destroy = tcp_v4_destroy_sock,
-- .shutdown = tcp_shutdown,
-- .setsockopt = tcp_setsockopt,
-- .getsockopt = tcp_getsockopt,
-- .recvmsg = tcp_recvmsg,
-- .backlog_rcv = tcp_v4_do_rcv,
-- .hash = tcp_v4_hash,
-- .unhash = tcp_unhash,
-- .get_port = tcp_v4_get_port,
-- .enter_memory_pressure = tcp_enter_memory_pressure,
-- .sockets_allocated = &tcp_sockets_allocated,
-- .orphan_count = &tcp_orphan_count,
-- .memory_allocated = &tcp_memory_allocated,
-- .memory_pressure = &tcp_memory_pressure,
-- .sysctl_mem = sysctl_tcp_mem,
-- .sysctl_wmem = sysctl_tcp_wmem,
-- .sysctl_rmem = sysctl_tcp_rmem,
-- .max_header = MAX_TCP_HEADER,
-- .obj_size = sizeof(struct tcp_sock),
-- .twsk_prot = &tcp_timewait_sock_ops,
-- .rsk_prot = &tcp_request_sock_ops,
--#ifdef CONFIG_COMPAT
-- .compat_setsockopt = compat_tcp_setsockopt,
-- .compat_getsockopt = compat_tcp_getsockopt,
--#endif
--};
--
--void __init tcp_v4_init(struct net_proto_family *ops)
--{
-- if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
-- IPPROTO_TCP) < 0)
-- panic("Failed to create the TCP control socket.\n");
--}
--
--EXPORT_SYMBOL(ipv4_specific);
--EXPORT_SYMBOL(tcp_hashinfo);
--EXPORT_SYMBOL(tcp_prot);
--EXPORT_SYMBOL(tcp_unhash);
--EXPORT_SYMBOL(tcp_v4_conn_request);
--EXPORT_SYMBOL(tcp_v4_connect);
--EXPORT_SYMBOL(tcp_v4_do_rcv);
--EXPORT_SYMBOL(tcp_v4_remember_stamp);
--EXPORT_SYMBOL(tcp_v4_send_check);
--EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
--
--#ifdef CONFIG_PROC_FS
--EXPORT_SYMBOL(tcp_proc_register);
--EXPORT_SYMBOL(tcp_proc_unregister);
--#endif
--EXPORT_SYMBOL(sysctl_local_port_range);
--EXPORT_SYMBOL(sysctl_tcp_low_latency);
--
diff -Nurb linux-2.6.22-570/net/ipv4/tcp_output.c linux-2.6.22-590/net/ipv4/tcp_output.c
--- linux-2.6.22-570/net/ipv4/tcp_output.c 2007-07-08 19:32:17.000000000 -0400
+++ linux-2.6.22-590/net/ipv4/tcp_output.c 2008-03-20 13:28:03.000000000 -0400
}
- for_each_netdev(dev) {
-+ for_each_netdev(&init_net, dev) {
- struct in_device * in_dev = __in_dev_get_rtnl(dev);
- if (in_dev && (dev->flags & IFF_UP)) {
- struct in_ifaddr * ifa;
-@@ -2245,12 +2246,12 @@
-
- /* first try to inherit the link-local address from the link device */
- if (idev->dev->iflink &&
-- (link_dev = __dev_get_by_index(idev->dev->iflink))) {
-+ (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
- /* then try to inherit it from any device */
-- for_each_netdev(link_dev) {
-+ for_each_netdev(&init_net, link_dev) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
-@@ -2282,6 +2283,9 @@
- struct inet6_dev *idev = __in6_dev_get(dev);
- int run_pending = 0;
-
-+ if (dev->nd_net != &init_net)
-+ return NOTIFY_DONE;
-+
- switch(event) {
- case NETDEV_REGISTER:
- if (!idev && dev->mtu >= IPV6_MIN_MTU) {
-@@ -2419,7 +2423,7 @@
-
- ASSERT_RTNL();
-
-- if (dev == &loopback_dev && how == 1)
-+ if (dev == &init_net.loopback_dev && how == 1)
- how = 0;
-
- rt6_ifdown(dev);
-@@ -2850,18 +2854,18 @@
-
- int __init if6_proc_init(void)
- {
-- if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
-+ if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops))
- return -ENOMEM;
- return 0;
- }
-
- void if6_proc_exit(void)
- {
-- proc_net_remove("if_inet6");
-+ proc_net_remove(&init_net, "if_inet6");
- }
- #endif /* CONFIG_PROC_FS */
-
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- /* Check if address is a home address configured on any interface. */
- int ipv6_chk_home_addr(struct in6_addr *addr)
- {
-@@ -3017,11 +3021,15 @@
- static int
- inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
- {
-+ struct net *net = skb->sk->sk_net;
- struct ifaddrmsg *ifm;
- struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *pfx;
- int err;
-
-+ if (net != &init_net)
-+ return -EINVAL;
-+
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
- if (err < 0)
- return err;
-@@ -3074,6 +3082,7 @@
- static int
- inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
- {
-+ struct net *net = skb->sk->sk_net;
- struct ifaddrmsg *ifm;
- struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *pfx;
-@@ -3083,6 +3092,9 @@
- u8 ifa_flags;
- int err;
-
-+ if (net != &init_net)
-+ return -EINVAL;
-+
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
- if (err < 0)
- return err;
-@@ -3103,7 +3115,7 @@
- valid_lft = INFINITY_LIFE_TIME;
- }
-
-- dev = __dev_get_by_index(ifm->ifa_index);
-+ dev = __dev_get_by_index(&init_net, ifm->ifa_index);
- if (dev == NULL)
- return -ENODEV;
-
-@@ -3292,7 +3304,7 @@
- s_ip_idx = ip_idx = cb->args[1];
-
- idx = 0;
-- for_each_netdev(dev) {
-+ for_each_netdev(&init_net, dev) {
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
-@@ -3367,26 +3379,42 @@
-
- static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
- {
-+ struct net *net = skb->sk->sk_net;
- enum addr_type_t type = UNICAST_ADDR;
-+
-+ if (net != &init_net)
-+ return 0;
-+
- return inet6_dump_addr(skb, cb, type);
- }
-
- static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
- {
-+ struct net *net = skb->sk->sk_net;
- enum addr_type_t type = MULTICAST_ADDR;
-+
-+ if (net != &init_net)
-+ return 0;
-+
- return inet6_dump_addr(skb, cb, type);
- }
-
-
- static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
- {
-+ struct net *net = skb->sk->sk_net;
- enum addr_type_t type = ANYCAST_ADDR;
-+
-+ if (net != &init_net)
-+ return 0;
-+
- return inet6_dump_addr(skb, cb, type);
- }
-
- static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
- void *arg)
- {
-+ struct net *net = in_skb->sk->sk_net;
- struct ifaddrmsg *ifm;
- struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *addr = NULL;
-@@ -3395,6 +3423,9 @@
- struct sk_buff *skb;
- int err;
-
-+ if (net != &init_net)
-+ return -EINVAL;
-+
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
- if (err < 0)
- goto errout;
-@@ -3407,7 +3438,7 @@
-
- ifm = nlmsg_data(nlh);
- if (ifm->ifa_index)
-- dev = __dev_get_by_index(ifm->ifa_index);
-+ dev = __dev_get_by_index(&init_net, ifm->ifa_index);
-
- if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
- err = -EADDRNOTAVAIL;
-@@ -3427,7 +3458,7 @@
- kfree_skb(skb);
- goto errout_ifa;
- }
-- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
-+ err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
- errout_ifa:
- in6_ifa_put(ifa);
- errout:
-@@ -3450,10 +3481,10 @@
- kfree_skb(skb);
- goto errout;
- }
-- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
-+ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
- errout:
- if (err < 0)
-- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
-+ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
- }
-
- static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
-@@ -3612,19 +3643,22 @@
-
- static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
- {
-+ struct net *net = skb->sk->sk_net;
- int idx, err;
- int s_idx = cb->args[0];
- struct net_device *dev;
- struct inet6_dev *idev;
- struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
-
-+ if (net != &init_net)
-+ return 0;
- /* FIXME: maybe disable ipv6 on non v6 guests?
- if (skb->sk && skb->sk->sk_vx_info)
- return skb->len; */
-
- read_lock(&dev_base_lock);
- idx = 0;
-- for_each_netdev(dev) {
-+ for_each_netdev(&init_net, dev) {
- if (idx < s_idx)
- goto cont;
- if (!v6_dev_in_nx_info(dev, nxi))
-@@ -3661,10 +3695,10 @@
- kfree_skb(skb);
- goto errout;
- }
-- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
-+ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
- errout:
- if (err < 0)
-- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
-+ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
- }
-
- static inline size_t inet6_prefix_nlmsg_size(void)
-@@ -3730,10 +3764,10 @@
- kfree_skb(skb);
- goto errout;
- }
-- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
-+ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
- errout:
- if (err < 0)
-- rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
-+ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err);
- }
-
- static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
-@@ -4244,16 +4278,16 @@
- * device and it being up should be removed.
- */
- rtnl_lock();
-- if (!ipv6_add_dev(&loopback_dev))
-+ if (!ipv6_add_dev(&init_net.loopback_dev))
- err = -ENOMEM;
- rtnl_unlock();
- if (err)
- return err;
-
-- ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev);
-+ ip6_null_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
- #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-- ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev);
-- ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev);
-+ ip6_prohibit_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
-+ ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
- #endif
-
- register_netdevice_notifier(&ipv6_dev_notf);
-@@ -4304,12 +4338,12 @@
- * clean dev list.
- */
-
-- for_each_netdev(dev) {
-+ for_each_netdev(&init_net, dev) {
- if ((idev = __in6_dev_get(dev)) == NULL)
- continue;
- addrconf_ifdown(dev, 1);
- }
-- addrconf_ifdown(&loopback_dev, 2);
-+ addrconf_ifdown(&init_net.loopback_dev, 2);
-
- /*
- * Check hash table.
-@@ -4335,6 +4369,6 @@
- rtnl_unlock();
-
- #ifdef CONFIG_PROC_FS
-- proc_net_remove("if_inet6");
-+ proc_net_remove(&init_net, "if_inet6");
- #endif
- }
-diff -Nurb linux-2.6.22-570/net/ipv6/addrconf.c.orig linux-2.6.22-590/net/ipv6/addrconf.c.orig
---- linux-2.6.22-570/net/ipv6/addrconf.c.orig 2008-03-20 13:25:40.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/addrconf.c.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,4301 +0,0 @@
--/*
-- * IPv6 Address [auto]configuration
-- * Linux INET6 implementation
-- *
-- * Authors:
-- * Pedro Roque <roque@di.fc.ul.pt>
-- * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
-- *
-- * $Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License
-- * as published by the Free Software Foundation; either version
-- * 2 of the License, or (at your option) any later version.
-- */
--
--/*
-- * Changes:
-- *
-- * Janos Farkas : delete timer on ifdown
-- * <chexum@bankinf.banki.hu>
-- * Andi Kleen : kill double kfree on module
-- * unload.
-- * Maciej W. Rozycki : FDDI support
-- * sekiya@USAGI : Don't send too many RS
-- * packets.
-- * yoshfuji@USAGI : Fixed interval between DAD
-- * packets.
-- * YOSHIFUJI Hideaki @USAGI : improved accuracy of
-- * address validation timer.
-- * YOSHIFUJI Hideaki @USAGI : Privacy Extensions (RFC3041)
-- * support.
-- * Yuji SEKIYA @USAGI : Don't assign a same IPv6
-- * address on a same interface.
-- * YOSHIFUJI Hideaki @USAGI : ARCnet support
-- * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to
-- * seq_file.
-- * YOSHIFUJI Hideaki @USAGI : improved source address
-- * selection; consider scope,
-- * status etc.
-- */
--
--#include <linux/errno.h>
--#include <linux/types.h>
--#include <linux/socket.h>
--#include <linux/sockios.h>
--#include <linux/net.h>
--#include <linux/in6.h>
--#include <linux/netdevice.h>
--#include <linux/if_addr.h>
--#include <linux/if_arp.h>
--#include <linux/if_arcnet.h>
--#include <linux/if_infiniband.h>
--#include <linux/route.h>
--#include <linux/inetdevice.h>
--#include <linux/init.h>
--#ifdef CONFIG_SYSCTL
--#include <linux/sysctl.h>
--#endif
--#include <linux/capability.h>
--#include <linux/delay.h>
--#include <linux/notifier.h>
--#include <linux/string.h>
--
--#include <net/sock.h>
--#include <net/snmp.h>
--
--#include <net/ipv6.h>
--#include <net/protocol.h>
--#include <net/ndisc.h>
--#include <net/ip6_route.h>
--#include <net/addrconf.h>
--#include <net/tcp.h>
--#include <net/ip.h>
--#include <net/netlink.h>
--#include <net/pkt_sched.h>
--#include <linux/if_tunnel.h>
--#include <linux/rtnetlink.h>
--
--#ifdef CONFIG_IPV6_PRIVACY
--#include <linux/random.h>
--#endif
--
--#include <asm/uaccess.h>
--#include <asm/unaligned.h>
--
--#include <linux/proc_fs.h>
--#include <linux/seq_file.h>
--
--/* Set to 3 to get tracing... */
--#define ACONF_DEBUG 2
--
--#if ACONF_DEBUG >= 3
--#define ADBG(x) printk x
--#else
--#define ADBG(x)
--#endif
--
--#define INFINITY_LIFE_TIME 0xFFFFFFFF
--#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
--
--#ifdef CONFIG_SYSCTL
--static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
--static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
--#endif
--
--#ifdef CONFIG_IPV6_PRIVACY
--static int __ipv6_regen_rndid(struct inet6_dev *idev);
--static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
--static void ipv6_regen_rndid(unsigned long data);
--
--static int desync_factor = MAX_DESYNC_FACTOR * HZ;
--#endif
--
--static int ipv6_count_addresses(struct inet6_dev *idev);
--
--/*
-- * Configured unicast address hash table
-- */
--static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE];
--static DEFINE_RWLOCK(addrconf_hash_lock);
--
--static void addrconf_verify(unsigned long);
--
--static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
--static DEFINE_SPINLOCK(addrconf_verify_lock);
--
--static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
--static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
--
--static int addrconf_ifdown(struct net_device *dev, int how);
--
--static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
--static void addrconf_dad_timer(unsigned long data);
--static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
--static void addrconf_dad_run(struct inet6_dev *idev);
--static void addrconf_rs_timer(unsigned long data);
--static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
--static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
--
--static void inet6_prefix_notify(int event, struct inet6_dev *idev,
-- struct prefix_info *pinfo);
--static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev);
--
--static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
--
--struct ipv6_devconf ipv6_devconf __read_mostly = {
-- .forwarding = 0,
-- .hop_limit = IPV6_DEFAULT_HOPLIMIT,
-- .mtu6 = IPV6_MIN_MTU,
-- .accept_ra = 1,
-- .accept_redirects = 1,
-- .autoconf = 1,
-- .force_mld_version = 0,
-- .dad_transmits = 1,
-- .rtr_solicits = MAX_RTR_SOLICITATIONS,
-- .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
-- .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
--#ifdef CONFIG_IPV6_PRIVACY
-- .use_tempaddr = 0,
-- .temp_valid_lft = TEMP_VALID_LIFETIME,
-- .temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
-- .regen_max_retry = REGEN_MAX_RETRY,
-- .max_desync_factor = MAX_DESYNC_FACTOR,
--#endif
-- .max_addresses = IPV6_MAX_ADDRESSES,
-- .accept_ra_defrtr = 1,
-- .accept_ra_pinfo = 1,
--#ifdef CONFIG_IPV6_ROUTER_PREF
-- .accept_ra_rtr_pref = 1,
-- .rtr_probe_interval = 60 * HZ,
--#ifdef CONFIG_IPV6_ROUTE_INFO
-- .accept_ra_rt_info_max_plen = 0,
--#endif
--#endif
-- .proxy_ndp = 0,
-- .accept_source_route = 0, /* we do not accept RH0 by default. */
--};
--
--static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
-- .forwarding = 0,
-- .hop_limit = IPV6_DEFAULT_HOPLIMIT,
-- .mtu6 = IPV6_MIN_MTU,
-- .accept_ra = 1,
-- .accept_redirects = 1,
-- .autoconf = 1,
-- .dad_transmits = 1,
-- .rtr_solicits = MAX_RTR_SOLICITATIONS,
-- .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
-- .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
--#ifdef CONFIG_IPV6_PRIVACY
-- .use_tempaddr = 0,
-- .temp_valid_lft = TEMP_VALID_LIFETIME,
-- .temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
-- .regen_max_retry = REGEN_MAX_RETRY,
-- .max_desync_factor = MAX_DESYNC_FACTOR,
--#endif
-- .max_addresses = IPV6_MAX_ADDRESSES,
-- .accept_ra_defrtr = 1,
-- .accept_ra_pinfo = 1,
--#ifdef CONFIG_IPV6_ROUTER_PREF
-- .accept_ra_rtr_pref = 1,
-- .rtr_probe_interval = 60 * HZ,
--#ifdef CONFIG_IPV6_ROUTE_INFO
-- .accept_ra_rt_info_max_plen = 0,
--#endif
--#endif
-- .proxy_ndp = 0,
-- .accept_source_route = 0, /* we do not accept RH0 by default. */
--};
--
--/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
--const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
--const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
--
--/* Check if a valid qdisc is available */
--static inline int addrconf_qdisc_ok(struct net_device *dev)
--{
-- return (dev->qdisc != &noop_qdisc);
--}
--
--static void addrconf_del_timer(struct inet6_ifaddr *ifp)
--{
-- if (del_timer(&ifp->timer))
-- __in6_ifa_put(ifp);
--}
--
--enum addrconf_timer_t
--{
-- AC_NONE,
-- AC_DAD,
-- AC_RS,
--};
--
--static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
-- enum addrconf_timer_t what,
-- unsigned long when)
--{
-- if (!del_timer(&ifp->timer))
-- in6_ifa_hold(ifp);
--
-- switch (what) {
-- case AC_DAD:
-- ifp->timer.function = addrconf_dad_timer;
-- break;
-- case AC_RS:
-- ifp->timer.function = addrconf_rs_timer;
-- break;
-- default:;
-- }
-- ifp->timer.expires = jiffies + when;
-- add_timer(&ifp->timer);
--}
--
--static int snmp6_alloc_dev(struct inet6_dev *idev)
--{
-- int err = -ENOMEM;
--
-- if (!idev || !idev->dev)
-- return -EINVAL;
--
-- if (snmp_mib_init((void **)idev->stats.ipv6,
-- sizeof(struct ipstats_mib),
-- __alignof__(struct ipstats_mib)) < 0)
-- goto err_ip;
-- if (snmp_mib_init((void **)idev->stats.icmpv6,
-- sizeof(struct icmpv6_mib),
-- __alignof__(struct icmpv6_mib)) < 0)
-- goto err_icmp;
--
-- return 0;
--
--err_icmp:
-- snmp_mib_free((void **)idev->stats.ipv6);
--err_ip:
-- return err;
--}
--
--static int snmp6_free_dev(struct inet6_dev *idev)
--{
-- snmp_mib_free((void **)idev->stats.icmpv6);
-- snmp_mib_free((void **)idev->stats.ipv6);
-- return 0;
--}
--
--/* Nobody refers to this device, we may destroy it. */
--
--static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
--{
-- struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
-- kfree(idev);
--}
--
--void in6_dev_finish_destroy(struct inet6_dev *idev)
--{
-- struct net_device *dev = idev->dev;
-- BUG_TRAP(idev->addr_list==NULL);
-- BUG_TRAP(idev->mc_list==NULL);
--#ifdef NET_REFCNT_DEBUG
-- printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL");
--#endif
-- dev_put(dev);
-- if (!idev->dead) {
-- printk("Freeing alive inet6 device %p\n", idev);
-- return;
-- }
-- snmp6_free_dev(idev);
-- call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
--}
--
--EXPORT_SYMBOL(in6_dev_finish_destroy);
--
--static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
--{
-- struct inet6_dev *ndev;
-- struct in6_addr maddr;
--
-- ASSERT_RTNL();
--
-- if (dev->mtu < IPV6_MIN_MTU)
-- return NULL;
--
-- ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
--
-- if (ndev == NULL)
-- return NULL;
--
-- rwlock_init(&ndev->lock);
-- ndev->dev = dev;
-- memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf));
-- ndev->cnf.mtu6 = dev->mtu;
-- ndev->cnf.sysctl = NULL;
-- ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
-- if (ndev->nd_parms == NULL) {
-- kfree(ndev);
-- return NULL;
-- }
-- /* We refer to the device */
-- dev_hold(dev);
--
-- if (snmp6_alloc_dev(ndev) < 0) {
-- ADBG((KERN_WARNING
-- "%s(): cannot allocate memory for statistics; dev=%s.\n",
-- __FUNCTION__, dev->name));
-- neigh_parms_release(&nd_tbl, ndev->nd_parms);
-- ndev->dead = 1;
-- in6_dev_finish_destroy(ndev);
-- return NULL;
-- }
--
-- if (snmp6_register_dev(ndev) < 0) {
-- ADBG((KERN_WARNING
-- "%s(): cannot create /proc/net/dev_snmp6/%s\n",
-- __FUNCTION__, dev->name));
-- neigh_parms_release(&nd_tbl, ndev->nd_parms);
-- ndev->dead = 1;
-- in6_dev_finish_destroy(ndev);
-- return NULL;
-- }
--
-- /* One reference from device. We must do this before
-- * we invoke __ipv6_regen_rndid().
-- */
-- in6_dev_hold(ndev);
--
--#ifdef CONFIG_IPV6_PRIVACY
-- init_timer(&ndev->regen_timer);
-- ndev->regen_timer.function = ipv6_regen_rndid;
-- ndev->regen_timer.data = (unsigned long) ndev;
-- if ((dev->flags&IFF_LOOPBACK) ||
-- dev->type == ARPHRD_TUNNEL ||
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
-- dev->type == ARPHRD_SIT ||
--#endif
-- dev->type == ARPHRD_NONE) {
-- printk(KERN_INFO
-- "%s: Disabled Privacy Extensions\n",
-- dev->name);
-- ndev->cnf.use_tempaddr = -1;
-- } else {
-- in6_dev_hold(ndev);
-- ipv6_regen_rndid((unsigned long) ndev);
-- }
--#endif
--
-- if (netif_running(dev) && addrconf_qdisc_ok(dev))
-- ndev->if_flags |= IF_READY;
--
-- ipv6_mc_init_dev(ndev);
-- ndev->tstamp = jiffies;
--#ifdef CONFIG_SYSCTL
-- neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6,
-- NET_IPV6_NEIGH, "ipv6",
-- &ndisc_ifinfo_sysctl_change,
-- NULL);
-- addrconf_sysctl_register(ndev, &ndev->cnf);
--#endif
-- /* protected by rtnl_lock */
-- rcu_assign_pointer(dev->ip6_ptr, ndev);
--
-- /* Join all-node multicast group */
-- ipv6_addr_all_nodes(&maddr);
-- ipv6_dev_mc_inc(dev, &maddr);
--
-- return ndev;
--}
--
--static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
--{
-- struct inet6_dev *idev;
--
-- ASSERT_RTNL();
--
-- if ((idev = __in6_dev_get(dev)) == NULL) {
-- if ((idev = ipv6_add_dev(dev)) == NULL)
-- return NULL;
-- }
--
-- if (dev->flags&IFF_UP)
-- ipv6_mc_up(idev);
-- return idev;
--}
--
--#ifdef CONFIG_SYSCTL
--static void dev_forward_change(struct inet6_dev *idev)
--{
-- struct net_device *dev;
-- struct inet6_ifaddr *ifa;
-- struct in6_addr addr;
--
-- if (!idev)
-- return;
-- dev = idev->dev;
-- if (dev && (dev->flags & IFF_MULTICAST)) {
-- ipv6_addr_all_routers(&addr);
--
-- if (idev->cnf.forwarding)
-- ipv6_dev_mc_inc(dev, &addr);
-- else
-- ipv6_dev_mc_dec(dev, &addr);
-- }
-- for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
-- if (ifa->flags&IFA_F_TENTATIVE)
-- continue;
-- if (idev->cnf.forwarding)
-- addrconf_join_anycast(ifa);
-- else
-- addrconf_leave_anycast(ifa);
-- }
--}
--
--
--static void addrconf_forward_change(void)
--{
-- struct net_device *dev;
-- struct inet6_dev *idev;
--
-- read_lock(&dev_base_lock);
-- for_each_netdev(dev) {
-- rcu_read_lock();
-- idev = __in6_dev_get(dev);
-- if (idev) {
-- int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
-- idev->cnf.forwarding = ipv6_devconf.forwarding;
-- if (changed)
-- dev_forward_change(idev);
-- }
-- rcu_read_unlock();
-- }
-- read_unlock(&dev_base_lock);
--}
--#endif
--
--/* Nobody refers to this ifaddr, destroy it */
--
--void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
--{
-- BUG_TRAP(ifp->if_next==NULL);
-- BUG_TRAP(ifp->lst_next==NULL);
--#ifdef NET_REFCNT_DEBUG
-- printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
--#endif
--
-- in6_dev_put(ifp->idev);
--
-- if (del_timer(&ifp->timer))
-- printk("Timer is still running, when freeing ifa=%p\n", ifp);
--
-- if (!ifp->dead) {
-- printk("Freeing alive inet6 address %p\n", ifp);
-- return;
-- }
-- dst_release(&ifp->rt->u.dst);
--
-- kfree(ifp);
--}
--
--static void
--ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
--{
-- struct inet6_ifaddr *ifa, **ifap;
-- int ifp_scope = ipv6_addr_src_scope(&ifp->addr);
--
-- /*
-- * Each device address list is sorted in order of scope -
-- * global before linklocal.
-- */
-- for (ifap = &idev->addr_list; (ifa = *ifap) != NULL;
-- ifap = &ifa->if_next) {
-- if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr))
-- break;
-- }
--
-- ifp->if_next = *ifap;
-- *ifap = ifp;
--}
--
--/* On success it returns ifp with increased reference count */
--
--static struct inet6_ifaddr *
--ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
-- int scope, u32 flags)
--{
-- struct inet6_ifaddr *ifa = NULL;
-- struct rt6_info *rt;
-- int hash;
-- int err = 0;
--
-- rcu_read_lock_bh();
-- if (idev->dead) {
-- err = -ENODEV; /*XXX*/
-- goto out2;
-- }
--
-- write_lock(&addrconf_hash_lock);
--
-- /* Ignore adding duplicate addresses on an interface */
-- if (ipv6_chk_same_addr(addr, idev->dev)) {
-- ADBG(("ipv6_add_addr: already assigned\n"));
-- err = -EEXIST;
-- goto out;
-- }
--
-- ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
--
-- if (ifa == NULL) {
-- ADBG(("ipv6_add_addr: malloc failed\n"));
-- err = -ENOBUFS;
-- goto out;
-- }
--
-- rt = addrconf_dst_alloc(idev, addr, 0);
-- if (IS_ERR(rt)) {
-- err = PTR_ERR(rt);
-- goto out;
-- }
--
-- ipv6_addr_copy(&ifa->addr, addr);
--
-- spin_lock_init(&ifa->lock);
-- init_timer(&ifa->timer);
-- ifa->timer.data = (unsigned long) ifa;
-- ifa->scope = scope;
-- ifa->prefix_len = pfxlen;
-- ifa->flags = flags | IFA_F_TENTATIVE;
-- ifa->cstamp = ifa->tstamp = jiffies;
--
-- ifa->rt = rt;
--
-- /*
-- * part one of RFC 4429, section 3.3
-- * We should not configure an address as
-- * optimistic if we do not yet know the link
-- * layer address of our nexhop router
-- */
--
-- if (rt->rt6i_nexthop == NULL)
-- ifa->flags &= ~IFA_F_OPTIMISTIC;
--
-- ifa->idev = idev;
-- in6_dev_hold(idev);
-- /* For caller */
-- in6_ifa_hold(ifa);
--
-- /* Add to big hash table */
-- hash = ipv6_addr_hash(addr);
--
-- ifa->lst_next = inet6_addr_lst[hash];
-- inet6_addr_lst[hash] = ifa;
-- in6_ifa_hold(ifa);
-- write_unlock(&addrconf_hash_lock);
--
-- write_lock(&idev->lock);
-- /* Add to inet6_dev unicast addr list. */
-- ipv6_link_dev_addr(idev, ifa);
--
--#ifdef CONFIG_IPV6_PRIVACY
-- if (ifa->flags&IFA_F_TEMPORARY) {
-- ifa->tmp_next = idev->tempaddr_list;
-- idev->tempaddr_list = ifa;
-- in6_ifa_hold(ifa);
-- }
--#endif
--
-- in6_ifa_hold(ifa);
-- write_unlock(&idev->lock);
--out2:
-- rcu_read_unlock_bh();
--
-- if (likely(err == 0))
-- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
-- else {
-- kfree(ifa);
-- ifa = ERR_PTR(err);
-- }
--
-- return ifa;
--out:
-- write_unlock(&addrconf_hash_lock);
-- goto out2;
--}
--
--/* This function wants to get referenced ifp and releases it before return */
--
--static void ipv6_del_addr(struct inet6_ifaddr *ifp)
--{
-- struct inet6_ifaddr *ifa, **ifap;
-- struct inet6_dev *idev = ifp->idev;
-- int hash;
-- int deleted = 0, onlink = 0;
-- unsigned long expires = jiffies;
--
-- hash = ipv6_addr_hash(&ifp->addr);
--
-- ifp->dead = 1;
--
-- write_lock_bh(&addrconf_hash_lock);
-- for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
-- ifap = &ifa->lst_next) {
-- if (ifa == ifp) {
-- *ifap = ifa->lst_next;
-- __in6_ifa_put(ifp);
-- ifa->lst_next = NULL;
-- break;
-- }
-- }
-- write_unlock_bh(&addrconf_hash_lock);
--
-- write_lock_bh(&idev->lock);
--#ifdef CONFIG_IPV6_PRIVACY
-- if (ifp->flags&IFA_F_TEMPORARY) {
-- for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
-- ifap = &ifa->tmp_next) {
-- if (ifa == ifp) {
-- *ifap = ifa->tmp_next;
-- if (ifp->ifpub) {
-- in6_ifa_put(ifp->ifpub);
-- ifp->ifpub = NULL;
-- }
-- __in6_ifa_put(ifp);
-- ifa->tmp_next = NULL;
-- break;
-- }
-- }
-- }
--#endif
--
-- for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) {
-- if (ifa == ifp) {
-- *ifap = ifa->if_next;
-- __in6_ifa_put(ifp);
-- ifa->if_next = NULL;
-- if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
-- break;
-- deleted = 1;
-- continue;
-- } else if (ifp->flags & IFA_F_PERMANENT) {
-- if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
-- ifp->prefix_len)) {
-- if (ifa->flags & IFA_F_PERMANENT) {
-- onlink = 1;
-- if (deleted)
-- break;
-- } else {
-- unsigned long lifetime;
--
-- if (!onlink)
-- onlink = -1;
--
-- spin_lock(&ifa->lock);
-- lifetime = min_t(unsigned long,
-- ifa->valid_lft, 0x7fffffffUL/HZ);
-- if (time_before(expires,
-- ifa->tstamp + lifetime * HZ))
-- expires = ifa->tstamp + lifetime * HZ;
-- spin_unlock(&ifa->lock);
-- }
-- }
-- }
-- ifap = &ifa->if_next;
-- }
-- write_unlock_bh(&idev->lock);
--
-- ipv6_ifa_notify(RTM_DELADDR, ifp);
--
-- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp);
--
-- addrconf_del_timer(ifp);
--
-- /*
-- * Purge or update corresponding prefix
-- *
-- * 1) we don't purge prefix here if address was not permanent.
-- * prefix is managed by its own lifetime.
-- * 2) if there're no addresses, delete prefix.
-- * 3) if there're still other permanent address(es),
-- * corresponding prefix is still permanent.
-- * 4) otherwise, update prefix lifetime to the
-- * longest valid lifetime among the corresponding
-- * addresses on the device.
-- * Note: subsequent RA will update lifetime.
-- *
-- * --yoshfuji
-- */
-- if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
-- struct in6_addr prefix;
-- struct rt6_info *rt;
--
-- ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
-- rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1);
--
-- if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
-- if (onlink == 0) {
-- ip6_del_rt(rt);
-- rt = NULL;
-- } else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
-- rt->rt6i_expires = expires;
-- rt->rt6i_flags |= RTF_EXPIRES;
-- }
-- }
-- dst_release(&rt->u.dst);
-- }
--
-- in6_ifa_put(ifp);
--}
--
--#ifdef CONFIG_IPV6_PRIVACY
--static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
--{
-- struct inet6_dev *idev = ifp->idev;
-- struct in6_addr addr, *tmpaddr;
-- unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp;
-- int tmp_plen;
-- int ret = 0;
-- int max_addresses;
-- u32 addr_flags;
--
-- write_lock(&idev->lock);
-- if (ift) {
-- spin_lock_bh(&ift->lock);
-- memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
-- spin_unlock_bh(&ift->lock);
-- tmpaddr = &addr;
-- } else {
-- tmpaddr = NULL;
-- }
--retry:
-- in6_dev_hold(idev);
-- if (idev->cnf.use_tempaddr <= 0) {
-- write_unlock(&idev->lock);
-- printk(KERN_INFO
-- "ipv6_create_tempaddr(): use_tempaddr is disabled.\n");
-- in6_dev_put(idev);
-- ret = -1;
-- goto out;
-- }
-- spin_lock_bh(&ifp->lock);
-- if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
-- idev->cnf.use_tempaddr = -1; /*XXX*/
-- spin_unlock_bh(&ifp->lock);
-- write_unlock(&idev->lock);
-- printk(KERN_WARNING
-- "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n");
-- in6_dev_put(idev);
-- ret = -1;
-- goto out;
-- }
-- in6_ifa_hold(ifp);
-- memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
-- if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
-- spin_unlock_bh(&ifp->lock);
-- write_unlock(&idev->lock);
-- printk(KERN_WARNING
-- "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n");
-- in6_ifa_put(ifp);
-- in6_dev_put(idev);
-- ret = -1;
-- goto out;
-- }
-- memcpy(&addr.s6_addr[8], idev->rndid, 8);
-- tmp_valid_lft = min_t(__u32,
-- ifp->valid_lft,
-- idev->cnf.temp_valid_lft);
-- tmp_prefered_lft = min_t(__u32,
-- ifp->prefered_lft,
-- idev->cnf.temp_prefered_lft - desync_factor / HZ);
-- tmp_plen = ifp->prefix_len;
-- max_addresses = idev->cnf.max_addresses;
-- tmp_cstamp = ifp->cstamp;
-- tmp_tstamp = ifp->tstamp;
-- spin_unlock_bh(&ifp->lock);
--
-- write_unlock(&idev->lock);
--
-- addr_flags = IFA_F_TEMPORARY;
-- /* set in addrconf_prefix_rcv() */
-- if (ifp->flags & IFA_F_OPTIMISTIC)
-- addr_flags |= IFA_F_OPTIMISTIC;
--
-- ift = !max_addresses ||
-- ipv6_count_addresses(idev) < max_addresses ?
-- ipv6_add_addr(idev, &addr, tmp_plen,
-- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
-- addr_flags) : NULL;
-- if (!ift || IS_ERR(ift)) {
-- in6_ifa_put(ifp);
-- in6_dev_put(idev);
-- printk(KERN_INFO
-- "ipv6_create_tempaddr(): retry temporary address regeneration.\n");
-- tmpaddr = &addr;
-- write_lock(&idev->lock);
-- goto retry;
-- }
--
-- spin_lock_bh(&ift->lock);
-- ift->ifpub = ifp;
-- ift->valid_lft = tmp_valid_lft;
-- ift->prefered_lft = tmp_prefered_lft;
-- ift->cstamp = tmp_cstamp;
-- ift->tstamp = tmp_tstamp;
-- spin_unlock_bh(&ift->lock);
--
-- addrconf_dad_start(ift, 0);
-- in6_ifa_put(ift);
-- in6_dev_put(idev);
--out:
-- return ret;
--}
--#endif
--
--/*
-- * Choose an appropriate source address (RFC3484)
-- */
--struct ipv6_saddr_score {
-- int addr_type;
-- unsigned int attrs;
-- int matchlen;
-- int scope;
-- unsigned int rule;
--};
--
--#define IPV6_SADDR_SCORE_LOCAL 0x0001
--#define IPV6_SADDR_SCORE_PREFERRED 0x0004
--#define IPV6_SADDR_SCORE_HOA 0x0008
--#define IPV6_SADDR_SCORE_OIF 0x0010
--#define IPV6_SADDR_SCORE_LABEL 0x0020
--#define IPV6_SADDR_SCORE_PRIVACY 0x0040
--
--static inline int ipv6_saddr_preferred(int type)
--{
-- if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|
-- IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED))
-- return 1;
-- return 0;
--}
--
--/* static matching label */
--static inline int ipv6_saddr_label(const struct in6_addr *addr, int type)
--{
-- /*
-- * prefix (longest match) label
-- * -----------------------------
-- * ::1/128 0
-- * ::/0 1
-- * 2002::/16 2
-- * ::/96 3
-- * ::ffff:0:0/96 4
-- * fc00::/7 5
-- * 2001::/32 6
-- */
-- if (type & IPV6_ADDR_LOOPBACK)
-- return 0;
-- else if (type & IPV6_ADDR_COMPATv4)
-- return 3;
-- else if (type & IPV6_ADDR_MAPPED)
-- return 4;
-- else if (addr->s6_addr32[0] == htonl(0x20010000))
-- return 6;
-- else if (addr->s6_addr16[0] == htons(0x2002))
-- return 2;
-- else if ((addr->s6_addr[0] & 0xfe) == 0xfc)
-- return 5;
-- return 1;
--}
--
--int ipv6_dev_get_saddr(struct net_device *daddr_dev,
-- struct in6_addr *daddr, struct in6_addr *saddr)
--{
-- struct ipv6_saddr_score hiscore;
-- struct inet6_ifaddr *ifa_result = NULL;
-- int daddr_type = __ipv6_addr_type(daddr);
-- int daddr_scope = __ipv6_addr_src_scope(daddr_type);
-- u32 daddr_label = ipv6_saddr_label(daddr, daddr_type);
-- struct net_device *dev;
--
-- memset(&hiscore, 0, sizeof(hiscore));
--
-- read_lock(&dev_base_lock);
-- rcu_read_lock();
--
-- for_each_netdev(dev) {
-- struct inet6_dev *idev;
-- struct inet6_ifaddr *ifa;
--
-- /* Rule 0: Candidate Source Address (section 4)
-- * - multicast and link-local destination address,
-- * the set of candidate source address MUST only
-- * include addresses assigned to interfaces
-- * belonging to the same link as the outgoing
-- * interface.
-- * (- For site-local destination addresses, the
-- * set of candidate source addresses MUST only
-- * include addresses assigned to interfaces
-- * belonging to the same site as the outgoing
-- * interface.)
-- */
-- if ((daddr_type & IPV6_ADDR_MULTICAST ||
-- daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
-- daddr_dev && dev != daddr_dev)
-- continue;
--
-- idev = __in6_dev_get(dev);
-- if (!idev)
-- continue;
--
-- read_lock_bh(&idev->lock);
-- for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
-- struct ipv6_saddr_score score;
--
-- score.addr_type = __ipv6_addr_type(&ifa->addr);
--
-- /* Rule 0:
-- * - Tentative Address (RFC2462 section 5.4)
-- * - A tentative address is not considered
-- * "assigned to an interface" in the traditional
-- * sense, unless it is also flagged as optimistic.
-- * - Candidate Source Address (section 4)
-- * - In any case, anycast addresses, multicast
-- * addresses, and the unspecified address MUST
-- * NOT be included in a candidate set.
-- */
-- if ((ifa->flags & IFA_F_TENTATIVE) &&
-- (!(ifa->flags & IFA_F_OPTIMISTIC)))
-- continue;
-- if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
-- score.addr_type & IPV6_ADDR_MULTICAST)) {
-- LIMIT_NETDEBUG(KERN_DEBUG
-- "ADDRCONF: unspecified / multicast address"
-- "assigned as unicast address on %s",
-- dev->name);
-- continue;
-- }
--
-- score.attrs = 0;
-- score.matchlen = 0;
-- score.scope = 0;
-- score.rule = 0;
--
-- if (ifa_result == NULL) {
-- /* record it if the first available entry */
-- goto record_it;
-- }
--
-- /* Rule 1: Prefer same address */
-- if (hiscore.rule < 1) {
-- if (ipv6_addr_equal(&ifa_result->addr, daddr))
-- hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL;
-- hiscore.rule++;
-- }
-- if (ipv6_addr_equal(&ifa->addr, daddr)) {
-- score.attrs |= IPV6_SADDR_SCORE_LOCAL;
-- if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) {
-- score.rule = 1;
-- goto record_it;
-- }
-- } else {
-- if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)
-- continue;
-- }
--
-- /* Rule 2: Prefer appropriate scope */
-- if (hiscore.rule < 2) {
-- hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type);
-- hiscore.rule++;
-- }
-- score.scope = __ipv6_addr_src_scope(score.addr_type);
-- if (hiscore.scope < score.scope) {
-- if (hiscore.scope < daddr_scope) {
-- score.rule = 2;
-- goto record_it;
-- } else
-- continue;
-- } else if (score.scope < hiscore.scope) {
-- if (score.scope < daddr_scope)
-- break; /* addresses sorted by scope */
-- else {
-- score.rule = 2;
-- goto record_it;
-- }
-- }
--
-- /* Rule 3: Avoid deprecated and optimistic addresses */
-- if (hiscore.rule < 3) {
-- if (ipv6_saddr_preferred(hiscore.addr_type) ||
-- (((ifa_result->flags &
-- (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0)))
-- hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED;
-- hiscore.rule++;
-- }
-- if (ipv6_saddr_preferred(score.addr_type) ||
-- (((ifa->flags &
-- (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) {
-- score.attrs |= IPV6_SADDR_SCORE_PREFERRED;
-- if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) {
-- score.rule = 3;
-- goto record_it;
-- }
-- } else {
-- if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)
-- continue;
-- }
--
-- /* Rule 4: Prefer home address */
--#ifdef CONFIG_IPV6_MIP6
-- if (hiscore.rule < 4) {
-- if (ifa_result->flags & IFA_F_HOMEADDRESS)
-- hiscore.attrs |= IPV6_SADDR_SCORE_HOA;
-- hiscore.rule++;
-- }
-- if (ifa->flags & IFA_F_HOMEADDRESS) {
-- score.attrs |= IPV6_SADDR_SCORE_HOA;
-- if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) {
-- score.rule = 4;
-- goto record_it;
-- }
-- } else {
-- if (hiscore.attrs & IPV6_SADDR_SCORE_HOA)
-- continue;
-- }
--#else
-- if (hiscore.rule < 4)
-- hiscore.rule++;
--#endif
--
-- /* Rule 5: Prefer outgoing interface */
-- if (hiscore.rule < 5) {
-- if (daddr_dev == NULL ||
-- daddr_dev == ifa_result->idev->dev)
-- hiscore.attrs |= IPV6_SADDR_SCORE_OIF;
-- hiscore.rule++;
-- }
-- if (daddr_dev == NULL ||
-- daddr_dev == ifa->idev->dev) {
-- score.attrs |= IPV6_SADDR_SCORE_OIF;
-- if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) {
-- score.rule = 5;
-- goto record_it;
-- }
-- } else {
-- if (hiscore.attrs & IPV6_SADDR_SCORE_OIF)
-- continue;
-- }
--
-- /* Rule 6: Prefer matching label */
-- if (hiscore.rule < 6) {
-- if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label)
-- hiscore.attrs |= IPV6_SADDR_SCORE_LABEL;
-- hiscore.rule++;
-- }
-- if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) {
-- score.attrs |= IPV6_SADDR_SCORE_LABEL;
-- if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) {
-- score.rule = 6;
-- goto record_it;
-- }
-- } else {
-- if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL)
-- continue;
-- }
--
--#ifdef CONFIG_IPV6_PRIVACY
-- /* Rule 7: Prefer public address
-- * Note: prefer temprary address if use_tempaddr >= 2
-- */
-- if (hiscore.rule < 7) {
-- if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^
-- (ifa_result->idev->cnf.use_tempaddr >= 2))
-- hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY;
-- hiscore.rule++;
-- }
-- if ((!(ifa->flags & IFA_F_TEMPORARY)) ^
-- (ifa->idev->cnf.use_tempaddr >= 2)) {
-- score.attrs |= IPV6_SADDR_SCORE_PRIVACY;
-- if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) {
-- score.rule = 7;
-- goto record_it;
-- }
-- } else {
-- if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)
-- continue;
-- }
--#else
-- if (hiscore.rule < 7)
-- hiscore.rule++;
--#endif
-- /* Rule 8: Use longest matching prefix */
-- if (hiscore.rule < 8) {
-- hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr);
-- hiscore.rule++;
-- }
-- score.matchlen = ipv6_addr_diff(&ifa->addr, daddr);
-- if (score.matchlen > hiscore.matchlen) {
-- score.rule = 8;
-- goto record_it;
-- }
--#if 0
-- else if (score.matchlen < hiscore.matchlen)
-- continue;
--#endif
--
-- /* Final Rule: choose first available one */
-- continue;
--record_it:
-- if (ifa_result)
-- in6_ifa_put(ifa_result);
-- in6_ifa_hold(ifa);
-- ifa_result = ifa;
-- hiscore = score;
-- }
-- read_unlock_bh(&idev->lock);
-- }
-- rcu_read_unlock();
-- read_unlock(&dev_base_lock);
--
-- if (!ifa_result)
-- return -EADDRNOTAVAIL;
--
-- ipv6_addr_copy(saddr, &ifa_result->addr);
-- in6_ifa_put(ifa_result);
-- return 0;
--}
--
--
--int ipv6_get_saddr(struct dst_entry *dst,
-- struct in6_addr *daddr, struct in6_addr *saddr)
--{
-- return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr);
--}
--
--EXPORT_SYMBOL(ipv6_get_saddr);
--
--int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
-- unsigned char banned_flags)
--{
-- struct inet6_dev *idev;
-- int err = -EADDRNOTAVAIL;
--
-- rcu_read_lock();
-- if ((idev = __in6_dev_get(dev)) != NULL) {
-- struct inet6_ifaddr *ifp;
--
-- read_lock_bh(&idev->lock);
-- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-- if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) {
-- ipv6_addr_copy(addr, &ifp->addr);
-- err = 0;
-- break;
-- }
-- }
-- read_unlock_bh(&idev->lock);
-- }
-- rcu_read_unlock();
-- return err;
--}
--
--static int ipv6_count_addresses(struct inet6_dev *idev)
--{
-- int cnt = 0;
-- struct inet6_ifaddr *ifp;
--
-- read_lock_bh(&idev->lock);
-- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next)
-- cnt++;
-- read_unlock_bh(&idev->lock);
-- return cnt;
--}
--
--int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
--{
-- struct inet6_ifaddr * ifp;
-- u8 hash = ipv6_addr_hash(addr);
--
-- read_lock_bh(&addrconf_hash_lock);
-- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
-- if (ipv6_addr_equal(&ifp->addr, addr) &&
-- !(ifp->flags&IFA_F_TENTATIVE)) {
-- if (dev == NULL || ifp->idev->dev == dev ||
-- !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
-- break;
-- }
-- }
-- read_unlock_bh(&addrconf_hash_lock);
-- return ifp != NULL;
--}
--
--EXPORT_SYMBOL(ipv6_chk_addr);
--
--static
--int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
--{
-- struct inet6_ifaddr * ifp;
-- u8 hash = ipv6_addr_hash(addr);
--
-- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
-- if (ipv6_addr_equal(&ifp->addr, addr)) {
-- if (dev == NULL || ifp->idev->dev == dev)
-- break;
-- }
-- }
-- return ifp != NULL;
--}
--
--struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict)
--{
-- struct inet6_ifaddr * ifp;
-- u8 hash = ipv6_addr_hash(addr);
--
-- read_lock_bh(&addrconf_hash_lock);
-- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
-- if (ipv6_addr_equal(&ifp->addr, addr)) {
-- if (dev == NULL || ifp->idev->dev == dev ||
-- !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
-- in6_ifa_hold(ifp);
-- break;
-- }
-- }
-- }
-- read_unlock_bh(&addrconf_hash_lock);
--
-- return ifp;
--}
--
--int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
--{
-- const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
-- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
-- __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
-- __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
-- int sk_ipv6only = ipv6_only_sock(sk);
-- int sk2_ipv6only = inet_v6_ipv6only(sk2);
-- int addr_type = ipv6_addr_type(sk_rcv_saddr6);
-- int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
--
-- if (!sk2_rcv_saddr && !sk_ipv6only)
-- return 1;
--
-- if (addr_type2 == IPV6_ADDR_ANY &&
-- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
-- return 1;
--
-- if (addr_type == IPV6_ADDR_ANY &&
-- !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
-- return 1;
--
-- if (sk2_rcv_saddr6 &&
-- ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
-- return 1;
--
-- if (addr_type == IPV6_ADDR_MAPPED &&
-- !sk2_ipv6only &&
-- (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr))
-- return 1;
--
-- return 0;
--}
--
--/* Gets referenced address, destroys ifaddr */
--
--static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
--{
-- if (ifp->flags&IFA_F_PERMANENT) {
-- spin_lock_bh(&ifp->lock);
-- addrconf_del_timer(ifp);
-- ifp->flags |= IFA_F_TENTATIVE;
-- spin_unlock_bh(&ifp->lock);
-- in6_ifa_put(ifp);
--#ifdef CONFIG_IPV6_PRIVACY
-- } else if (ifp->flags&IFA_F_TEMPORARY) {
-- struct inet6_ifaddr *ifpub;
-- spin_lock_bh(&ifp->lock);
-- ifpub = ifp->ifpub;
-- if (ifpub) {
-- in6_ifa_hold(ifpub);
-- spin_unlock_bh(&ifp->lock);
-- ipv6_create_tempaddr(ifpub, ifp);
-- in6_ifa_put(ifpub);
-- } else {
-- spin_unlock_bh(&ifp->lock);
-- }
-- ipv6_del_addr(ifp);
--#endif
-- } else
-- ipv6_del_addr(ifp);
--}
--
--void addrconf_dad_failure(struct inet6_ifaddr *ifp)
--{
-- if (net_ratelimit())
-- printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
-- addrconf_dad_stop(ifp);
--}
--
--/* Join to solicited addr multicast group. */
--
--void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
--{
-- struct in6_addr maddr;
--
-- if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
-- return;
--
-- addrconf_addr_solict_mult(addr, &maddr);
-- ipv6_dev_mc_inc(dev, &maddr);
--}
--
--void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr)
--{
-- struct in6_addr maddr;
--
-- if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
-- return;
--
-- addrconf_addr_solict_mult(addr, &maddr);
-- __ipv6_dev_mc_dec(idev, &maddr);
--}
--
--static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
--{
-- struct in6_addr addr;
-- ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
-- if (ipv6_addr_any(&addr))
-- return;
-- ipv6_dev_ac_inc(ifp->idev->dev, &addr);
--}
--
--static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
--{
-- struct in6_addr addr;
-- ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
-- if (ipv6_addr_any(&addr))
-- return;
-- __ipv6_dev_ac_dec(ifp->idev, &addr);
--}
--
--static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
--{
-- if (dev->addr_len != ETH_ALEN)
-- return -1;
-- memcpy(eui, dev->dev_addr, 3);
-- memcpy(eui + 5, dev->dev_addr + 3, 3);
--
-- /*
-- * The zSeries OSA network cards can be shared among various
-- * OS instances, but the OSA cards have only one MAC address.
-- * This leads to duplicate address conflicts in conjunction
-- * with IPv6 if more than one instance uses the same card.
-- *
-- * The driver for these cards can deliver a unique 16-bit
-- * identifier for each instance sharing the same card. It is
-- * placed instead of 0xFFFE in the interface identifier. The
-- * "u" bit of the interface identifier is not inverted in this
-- * case. Hence the resulting interface identifier has local
-- * scope according to RFC2373.
-- */
-- if (dev->dev_id) {
-- eui[3] = (dev->dev_id >> 8) & 0xFF;
-- eui[4] = dev->dev_id & 0xFF;
-- } else {
-- eui[3] = 0xFF;
-- eui[4] = 0xFE;
-- eui[0] ^= 2;
-- }
-- return 0;
--}
--
--static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
--{
-- /* XXX: inherit EUI-64 from other interface -- yoshfuji */
-- if (dev->addr_len != ARCNET_ALEN)
-- return -1;
-- memset(eui, 0, 7);
-- eui[7] = *(u8*)dev->dev_addr;
-- return 0;
--}
--
--static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev)
--{
-- if (dev->addr_len != INFINIBAND_ALEN)
-- return -1;
-- memcpy(eui, dev->dev_addr + 12, 8);
-- eui[0] |= 2;
-- return 0;
--}
--
--static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
--{
-- switch (dev->type) {
-- case ARPHRD_ETHER:
-- case ARPHRD_FDDI:
-- case ARPHRD_IEEE802_TR:
-- return addrconf_ifid_eui48(eui, dev);
-- case ARPHRD_ARCNET:
-- return addrconf_ifid_arcnet(eui, dev);
-- case ARPHRD_INFINIBAND:
-- return addrconf_ifid_infiniband(eui, dev);
-- }
-- return -1;
--}
--
--static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
--{
-- int err = -1;
-- struct inet6_ifaddr *ifp;
--
-- read_lock_bh(&idev->lock);
-- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-- if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
-- memcpy(eui, ifp->addr.s6_addr+8, 8);
-- err = 0;
-- break;
-- }
-- }
-- read_unlock_bh(&idev->lock);
-- return err;
--}
--
--#ifdef CONFIG_IPV6_PRIVACY
--/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
--static int __ipv6_regen_rndid(struct inet6_dev *idev)
--{
--regen:
-- get_random_bytes(idev->rndid, sizeof(idev->rndid));
-- idev->rndid[0] &= ~0x02;
--
-- /*
-- * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>:
-- * check if generated address is not inappropriate
-- *
-- * - Reserved subnet anycast (RFC 2526)
-- * 11111101 11....11 1xxxxxxx
-- * - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1
-- * 00-00-5E-FE-xx-xx-xx-xx
-- * - value 0
-- * - XXX: already assigned to an address on the device
-- */
-- if (idev->rndid[0] == 0xfd &&
-- (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff &&
-- (idev->rndid[7]&0x80))
-- goto regen;
-- if ((idev->rndid[0]|idev->rndid[1]) == 0) {
-- if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe)
-- goto regen;
-- if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
-- goto regen;
-- }
--
-- return 0;
--}
--
--static void ipv6_regen_rndid(unsigned long data)
--{
-- struct inet6_dev *idev = (struct inet6_dev *) data;
-- unsigned long expires;
--
-- rcu_read_lock_bh();
-- write_lock_bh(&idev->lock);
--
-- if (idev->dead)
-- goto out;
--
-- if (__ipv6_regen_rndid(idev) < 0)
-- goto out;
--
-- expires = jiffies +
-- idev->cnf.temp_prefered_lft * HZ -
-- idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor;
-- if (time_before(expires, jiffies)) {
-- printk(KERN_WARNING
-- "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
-- idev->dev->name);
-- goto out;
-- }
--
-- if (!mod_timer(&idev->regen_timer, expires))
-- in6_dev_hold(idev);
--
--out:
-- write_unlock_bh(&idev->lock);
-- rcu_read_unlock_bh();
-- in6_dev_put(idev);
--}
--
--static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) {
-- int ret = 0;
--
-- if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
-- ret = __ipv6_regen_rndid(idev);
-- return ret;
--}
--#endif
--
--/*
-- * Add prefix route.
-- */
--
--static void
--addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
-- unsigned long expires, u32 flags)
--{
-- struct fib6_config cfg = {
-- .fc_table = RT6_TABLE_PREFIX,
-- .fc_metric = IP6_RT_PRIO_ADDRCONF,
-- .fc_ifindex = dev->ifindex,
-- .fc_expires = expires,
-- .fc_dst_len = plen,
-- .fc_flags = RTF_UP | flags,
-- };
--
-- ipv6_addr_copy(&cfg.fc_dst, pfx);
--
-- /* Prevent useless cloning on PtP SIT.
-- This thing is done here expecting that the whole
-- class of non-broadcast devices need not cloning.
-- */
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
-- if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
-- cfg.fc_flags |= RTF_NONEXTHOP;
--#endif
--
-- ip6_route_add(&cfg);
--}
--
--/* Create "default" multicast route to the interface */
--
--static void addrconf_add_mroute(struct net_device *dev)
--{
-- struct fib6_config cfg = {
-- .fc_table = RT6_TABLE_LOCAL,
-- .fc_metric = IP6_RT_PRIO_ADDRCONF,
-- .fc_ifindex = dev->ifindex,
-- .fc_dst_len = 8,
-- .fc_flags = RTF_UP,
-- };
--
-- ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
--
-- ip6_route_add(&cfg);
--}
--
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
--static void sit_route_add(struct net_device *dev)
--{
-- struct fib6_config cfg = {
-- .fc_table = RT6_TABLE_MAIN,
-- .fc_metric = IP6_RT_PRIO_ADDRCONF,
-- .fc_ifindex = dev->ifindex,
-- .fc_dst_len = 96,
-- .fc_flags = RTF_UP | RTF_NONEXTHOP,
-- };
--
-- /* prefix length - 96 bits "::d.d.d.d" */
-- ip6_route_add(&cfg);
--}
--#endif
--
--static void addrconf_add_lroute(struct net_device *dev)
--{
-- struct in6_addr addr;
--
-- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
-- addrconf_prefix_route(&addr, 64, dev, 0, 0);
--}
--
--static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
--{
-- struct inet6_dev *idev;
--
-- ASSERT_RTNL();
--
-- if ((idev = ipv6_find_idev(dev)) == NULL)
-- return NULL;
--
-- /* Add default multicast route */
-- addrconf_add_mroute(dev);
--
-- /* Add link local route */
-- addrconf_add_lroute(dev);
-- return idev;
--}
--
--void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
--{
-- struct prefix_info *pinfo;
-- __u32 valid_lft;
-- __u32 prefered_lft;
-- int addr_type;
-- unsigned long rt_expires;
-- struct inet6_dev *in6_dev;
--
-- pinfo = (struct prefix_info *) opt;
--
-- if (len < sizeof(struct prefix_info)) {
-- ADBG(("addrconf: prefix option too short\n"));
-- return;
-- }
--
-- /*
-- * Validation checks ([ADDRCONF], page 19)
-- */
--
-- addr_type = ipv6_addr_type(&pinfo->prefix);
--
-- if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL))
-- return;
--
-- valid_lft = ntohl(pinfo->valid);
-- prefered_lft = ntohl(pinfo->prefered);
--
-- if (prefered_lft > valid_lft) {
-- if (net_ratelimit())
-- printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n");
-- return;
-- }
--
-- in6_dev = in6_dev_get(dev);
--
-- if (in6_dev == NULL) {
-- if (net_ratelimit())
-- printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name);
-- return;
-- }
--
-- /*
-- * Two things going on here:
-- * 1) Add routes for on-link prefixes
-- * 2) Configure prefixes with the auto flag set
-- */
--
-- /* Avoid arithmetic overflow. Really, we could
-- save rt_expires in seconds, likely valid_lft,
-- but it would require division in fib gc, that it
-- not good.
-- */
-- if (valid_lft >= 0x7FFFFFFF/HZ)
-- rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
-- else
-- rt_expires = valid_lft * HZ;
--
-- /*
-- * We convert this (in jiffies) to clock_t later.
-- * Avoid arithmetic overflow there as well.
-- * Overflow can happen only if HZ < USER_HZ.
-- */
-- if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ)
-- rt_expires = 0x7FFFFFFF / USER_HZ;
--
-- if (pinfo->onlink) {
-- struct rt6_info *rt;
-- rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
--
-- if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
-- if (rt->rt6i_flags&RTF_EXPIRES) {
-- if (valid_lft == 0) {
-- ip6_del_rt(rt);
-- rt = NULL;
-- } else {
-- rt->rt6i_expires = jiffies + rt_expires;
-- }
-- }
-- } else if (valid_lft) {
-- addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
-- dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
-- }
-- if (rt)
-- dst_release(&rt->u.dst);
-- }
--
-- /* Try to figure out our local address for this prefix */
--
-- if (pinfo->autoconf && in6_dev->cnf.autoconf) {
-- struct inet6_ifaddr * ifp;
-- struct in6_addr addr;
-- int create = 0, update_lft = 0;
--
-- if (pinfo->prefix_len == 64) {
-- memcpy(&addr, &pinfo->prefix, 8);
-- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
-- ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
-- in6_dev_put(in6_dev);
-- return;
-- }
-- goto ok;
-- }
-- if (net_ratelimit())
-- printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n",
-- pinfo->prefix_len);
-- in6_dev_put(in6_dev);
-- return;
--
--ok:
--
-- ifp = ipv6_get_ifaddr(&addr, dev, 1);
--
-- if (ifp == NULL && valid_lft) {
-- int max_addresses = in6_dev->cnf.max_addresses;
-- u32 addr_flags = 0;
--
--#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-- if (in6_dev->cnf.optimistic_dad &&
-- !ipv6_devconf.forwarding)
-- addr_flags = IFA_F_OPTIMISTIC;
--#endif
--
-- /* Do not allow to create too much of autoconfigured
-- * addresses; this would be too easy way to crash kernel.
-- */
-- if (!max_addresses ||
-- ipv6_count_addresses(in6_dev) < max_addresses)
-- ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
-- addr_type&IPV6_ADDR_SCOPE_MASK,
-- addr_flags);
--
-- if (!ifp || IS_ERR(ifp)) {
-- in6_dev_put(in6_dev);
-- return;
-- }
--
-- update_lft = create = 1;
-- ifp->cstamp = jiffies;
-- addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT);
-- }
--
-- if (ifp) {
-- int flags;
-- unsigned long now;
--#ifdef CONFIG_IPV6_PRIVACY
-- struct inet6_ifaddr *ift;
--#endif
-- u32 stored_lft;
--
-- /* update lifetime (RFC2462 5.5.3 e) */
-- spin_lock(&ifp->lock);
-- now = jiffies;
-- if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
-- stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
-- else
-- stored_lft = 0;
-- if (!update_lft && stored_lft) {
-- if (valid_lft > MIN_VALID_LIFETIME ||
-- valid_lft > stored_lft)
-- update_lft = 1;
-- else if (stored_lft <= MIN_VALID_LIFETIME) {
-- /* valid_lft <= stored_lft is always true */
-- /* XXX: IPsec */
-- update_lft = 0;
-- } else {
-- valid_lft = MIN_VALID_LIFETIME;
-- if (valid_lft < prefered_lft)
-- prefered_lft = valid_lft;
-- update_lft = 1;
-- }
-- }
--
-- if (update_lft) {
-- ifp->valid_lft = valid_lft;
-- ifp->prefered_lft = prefered_lft;
-- ifp->tstamp = now;
-- flags = ifp->flags;
-- ifp->flags &= ~IFA_F_DEPRECATED;
-- spin_unlock(&ifp->lock);
--
-- if (!(flags&IFA_F_TENTATIVE))
-- ipv6_ifa_notify(0, ifp);
-- } else
-- spin_unlock(&ifp->lock);
--
--#ifdef CONFIG_IPV6_PRIVACY
-- read_lock_bh(&in6_dev->lock);
-- /* update all temporary addresses in the list */
-- for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
-- /*
-- * When adjusting the lifetimes of an existing
-- * temporary address, only lower the lifetimes.
-- * Implementations must not increase the
-- * lifetimes of an existing temporary address
-- * when processing a Prefix Information Option.
-- */
-- spin_lock(&ift->lock);
-- flags = ift->flags;
-- if (ift->valid_lft > valid_lft &&
-- ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ)
-- ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ;
-- if (ift->prefered_lft > prefered_lft &&
-- ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ)
-- ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ;
-- spin_unlock(&ift->lock);
-- if (!(flags&IFA_F_TENTATIVE))
-- ipv6_ifa_notify(0, ift);
-- }
--
-- if (create && in6_dev->cnf.use_tempaddr > 0) {
-- /*
-- * When a new public address is created as described in [ADDRCONF],
-- * also create a new temporary address.
-- */
-- read_unlock_bh(&in6_dev->lock);
-- ipv6_create_tempaddr(ifp, NULL);
-- } else {
-- read_unlock_bh(&in6_dev->lock);
-- }
--#endif
-- in6_ifa_put(ifp);
-- addrconf_verify(0);
-- }
-- }
-- inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
-- in6_dev_put(in6_dev);
--}
--
--/*
-- * Set destination address.
-- * Special case for SIT interfaces where we create a new "virtual"
-- * device.
-- */
--int addrconf_set_dstaddr(void __user *arg)
--{
-- struct in6_ifreq ireq;
-- struct net_device *dev;
-- int err = -EINVAL;
--
-- rtnl_lock();
--
-- err = -EFAULT;
-- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
-- goto err_exit;
--
-- dev = __dev_get_by_index(ireq.ifr6_ifindex);
--
-- err = -ENODEV;
-- if (dev == NULL)
-- goto err_exit;
--
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
-- if (dev->type == ARPHRD_SIT) {
-- struct ifreq ifr;
-- mm_segment_t oldfs;
-- struct ip_tunnel_parm p;
--
-- err = -EADDRNOTAVAIL;
-- if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4))
-- goto err_exit;
--
-- memset(&p, 0, sizeof(p));
-- p.iph.daddr = ireq.ifr6_addr.s6_addr32[3];
-- p.iph.saddr = 0;
-- p.iph.version = 4;
-- p.iph.ihl = 5;
-- p.iph.protocol = IPPROTO_IPV6;
-- p.iph.ttl = 64;
-- ifr.ifr_ifru.ifru_data = (void __user *)&p;
--
-- oldfs = get_fs(); set_fs(KERNEL_DS);
-- err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
-- set_fs(oldfs);
--
-- if (err == 0) {
-- err = -ENOBUFS;
-- if ((dev = __dev_get_by_name(p.name)) == NULL)
-- goto err_exit;
-- err = dev_open(dev);
-- }
-- }
--#endif
--
--err_exit:
-- rtnl_unlock();
-- return err;
--}
--
--/*
-- * Manual configuration of address on an interface
-- */
--static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
-- __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft)
--{
-- struct inet6_ifaddr *ifp;
-- struct inet6_dev *idev;
-- struct net_device *dev;
-- int scope;
-- u32 flags = RTF_EXPIRES;
--
-- ASSERT_RTNL();
--
-- /* check the lifetime */
-- if (!valid_lft || prefered_lft > valid_lft)
-- return -EINVAL;
--
-- if ((dev = __dev_get_by_index(ifindex)) == NULL)
-- return -ENODEV;
--
-- if ((idev = addrconf_add_dev(dev)) == NULL)
-- return -ENOBUFS;
--
-- scope = ipv6_addr_scope(pfx);
--
-- if (valid_lft == INFINITY_LIFE_TIME) {
-- ifa_flags |= IFA_F_PERMANENT;
-- flags = 0;
-- } else if (valid_lft >= 0x7FFFFFFF/HZ)
-- valid_lft = 0x7FFFFFFF/HZ;
--
-- if (prefered_lft == 0)
-- ifa_flags |= IFA_F_DEPRECATED;
-- else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
-- (prefered_lft != INFINITY_LIFE_TIME))
-- prefered_lft = 0x7FFFFFFF/HZ;
--
-- ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
--
-- if (!IS_ERR(ifp)) {
-- spin_lock_bh(&ifp->lock);
-- ifp->valid_lft = valid_lft;
-- ifp->prefered_lft = prefered_lft;
-- ifp->tstamp = jiffies;
-- spin_unlock_bh(&ifp->lock);
--
-- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
-- jiffies_to_clock_t(valid_lft * HZ), flags);
-- /*
-- * Note that section 3.1 of RFC 4429 indicates
-- * that the Optimistic flag should not be set for
-- * manually configured addresses
-- */
-- addrconf_dad_start(ifp, 0);
-- in6_ifa_put(ifp);
-- addrconf_verify(0);
-- return 0;
-- }
--
-- return PTR_ERR(ifp);
--}
--
--static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
--{
-- struct inet6_ifaddr *ifp;
-- struct inet6_dev *idev;
-- struct net_device *dev;
--
-- if ((dev = __dev_get_by_index(ifindex)) == NULL)
-- return -ENODEV;
--
-- if ((idev = __in6_dev_get(dev)) == NULL)
-- return -ENXIO;
--
-- read_lock_bh(&idev->lock);
-- for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) {
-- if (ifp->prefix_len == plen &&
-- ipv6_addr_equal(pfx, &ifp->addr)) {
-- in6_ifa_hold(ifp);
-- read_unlock_bh(&idev->lock);
--
-- ipv6_del_addr(ifp);
--
-- /* If the last address is deleted administratively,
-- disable IPv6 on this interface.
-- */
-- if (idev->addr_list == NULL)
-- addrconf_ifdown(idev->dev, 1);
-- return 0;
-- }
-- }
-- read_unlock_bh(&idev->lock);
-- return -EADDRNOTAVAIL;
--}
--
--
--int addrconf_add_ifaddr(void __user *arg)
--{
-- struct in6_ifreq ireq;
-- int err;
--
-- if (!capable(CAP_NET_ADMIN))
-- return -EPERM;
--
-- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
-- return -EFAULT;
--
-- rtnl_lock();
-- err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen,
-- IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
-- rtnl_unlock();
-- return err;
--}
--
--int addrconf_del_ifaddr(void __user *arg)
--{
-- struct in6_ifreq ireq;
-- int err;
--
-- if (!capable(CAP_NET_ADMIN))
-- return -EPERM;
--
-- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
-- return -EFAULT;
--
-- rtnl_lock();
-- err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
-- rtnl_unlock();
-- return err;
--}
--
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
--static void sit_add_v4_addrs(struct inet6_dev *idev)
--{
-- struct inet6_ifaddr * ifp;
-- struct in6_addr addr;
-- struct net_device *dev;
-- int scope;
--
-- ASSERT_RTNL();
--
-- memset(&addr, 0, sizeof(struct in6_addr));
-- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
--
-- if (idev->dev->flags&IFF_POINTOPOINT) {
-- addr.s6_addr32[0] = htonl(0xfe800000);
-- scope = IFA_LINK;
-- } else {
-- scope = IPV6_ADDR_COMPATv4;
-- }
--
-- if (addr.s6_addr32[3]) {
-- ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT);
-- if (!IS_ERR(ifp)) {
-- spin_lock_bh(&ifp->lock);
-- ifp->flags &= ~IFA_F_TENTATIVE;
-- spin_unlock_bh(&ifp->lock);
-- ipv6_ifa_notify(RTM_NEWADDR, ifp);
-- in6_ifa_put(ifp);
-- }
-- return;
-- }
--
-- for_each_netdev(dev) {
-- struct in_device * in_dev = __in_dev_get_rtnl(dev);
-- if (in_dev && (dev->flags & IFF_UP)) {
-- struct in_ifaddr * ifa;
--
-- int flag = scope;
--
-- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
-- int plen;
--
-- addr.s6_addr32[3] = ifa->ifa_local;
--
-- if (ifa->ifa_scope == RT_SCOPE_LINK)
-- continue;
-- if (ifa->ifa_scope >= RT_SCOPE_HOST) {
-- if (idev->dev->flags&IFF_POINTOPOINT)
-- continue;
-- flag |= IFA_HOST;
-- }
-- if (idev->dev->flags&IFF_POINTOPOINT)
-- plen = 64;
-- else
-- plen = 96;
--
-- ifp = ipv6_add_addr(idev, &addr, plen, flag,
-- IFA_F_PERMANENT);
-- if (!IS_ERR(ifp)) {
-- spin_lock_bh(&ifp->lock);
-- ifp->flags &= ~IFA_F_TENTATIVE;
-- spin_unlock_bh(&ifp->lock);
-- ipv6_ifa_notify(RTM_NEWADDR, ifp);
-- in6_ifa_put(ifp);
-- }
-- }
-- }
-- }
--}
--#endif
--
--static void init_loopback(struct net_device *dev)
--{
-- struct inet6_dev *idev;
-- struct inet6_ifaddr * ifp;
--
-- /* ::1 */
--
-- ASSERT_RTNL();
--
-- if ((idev = ipv6_find_idev(dev)) == NULL) {
-- printk(KERN_DEBUG "init loopback: add_dev failed\n");
-- return;
-- }
--
-- ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT);
-- if (!IS_ERR(ifp)) {
-- spin_lock_bh(&ifp->lock);
-- ifp->flags &= ~IFA_F_TENTATIVE;
-- spin_unlock_bh(&ifp->lock);
-- ipv6_ifa_notify(RTM_NEWADDR, ifp);
-- in6_ifa_put(ifp);
-- }
--}
--
--static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
--{
-- struct inet6_ifaddr * ifp;
-- u32 addr_flags = IFA_F_PERMANENT;
--
--#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-- if (idev->cnf.optimistic_dad &&
-- !ipv6_devconf.forwarding)
-- addr_flags |= IFA_F_OPTIMISTIC;
--#endif
--
--
-- ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
-- if (!IS_ERR(ifp)) {
-- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
-- addrconf_dad_start(ifp, 0);
-- in6_ifa_put(ifp);
-- }
--}
--
--static void addrconf_dev_config(struct net_device *dev)
--{
-- struct in6_addr addr;
-- struct inet6_dev * idev;
--
-- ASSERT_RTNL();
--
-- if ((dev->type != ARPHRD_ETHER) &&
-- (dev->type != ARPHRD_FDDI) &&
-- (dev->type != ARPHRD_IEEE802_TR) &&
-- (dev->type != ARPHRD_ARCNET) &&
-- (dev->type != ARPHRD_INFINIBAND)) {
-- /* Alas, we support only Ethernet autoconfiguration. */
-- return;
-- }
--
-- idev = addrconf_add_dev(dev);
-- if (idev == NULL)
-- return;
--
-- memset(&addr, 0, sizeof(struct in6_addr));
-- addr.s6_addr32[0] = htonl(0xFE800000);
--
-- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0)
-- addrconf_add_linklocal(idev, &addr);
--}
--
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
--static void addrconf_sit_config(struct net_device *dev)
--{
-- struct inet6_dev *idev;
--
-- ASSERT_RTNL();
--
-- /*
-- * Configure the tunnel with one of our IPv4
-- * addresses... we should configure all of
-- * our v4 addrs in the tunnel
-- */
--
-- if ((idev = ipv6_find_idev(dev)) == NULL) {
-- printk(KERN_DEBUG "init sit: add_dev failed\n");
-- return;
-- }
--
-- sit_add_v4_addrs(idev);
--
-- if (dev->flags&IFF_POINTOPOINT) {
-- addrconf_add_mroute(dev);
-- addrconf_add_lroute(dev);
-- } else
-- sit_route_add(dev);
--}
--#endif
--
--static inline int
--ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
--{
-- struct in6_addr lladdr;
--
-- if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
-- addrconf_add_linklocal(idev, &lladdr);
-- return 0;
-- }
-- return -1;
--}
--
--static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
--{
-- struct net_device *link_dev;
--
-- /* first try to inherit the link-local address from the link device */
-- if (idev->dev->iflink &&
-- (link_dev = __dev_get_by_index(idev->dev->iflink))) {
-- if (!ipv6_inherit_linklocal(idev, link_dev))
-- return;
-- }
-- /* then try to inherit it from any device */
-- for_each_netdev(link_dev) {
-- if (!ipv6_inherit_linklocal(idev, link_dev))
-- return;
-- }
-- printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n");
--}
--
--/*
-- * Autoconfigure tunnel with a link-local address so routing protocols,
-- * DHCPv6, MLD etc. can be run over the virtual link
-- */
--
--static void addrconf_ip6_tnl_config(struct net_device *dev)
--{
-- struct inet6_dev *idev;
--
-- ASSERT_RTNL();
--
-- if ((idev = addrconf_add_dev(dev)) == NULL) {
-- printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
-- return;
-- }
-- ip6_tnl_add_linklocal(idev);
--}
--
--static int addrconf_notify(struct notifier_block *this, unsigned long event,
-- void * data)
--{
-- struct net_device *dev = (struct net_device *) data;
-- struct inet6_dev *idev = __in6_dev_get(dev);
-- int run_pending = 0;
--
-- switch(event) {
-- case NETDEV_REGISTER:
-- if (!idev && dev->mtu >= IPV6_MIN_MTU) {
-- idev = ipv6_add_dev(dev);
-- if (!idev)
-- printk(KERN_WARNING "IPv6: add_dev failed for %s\n",
-- dev->name);
-- }
-- break;
-- case NETDEV_UP:
-- case NETDEV_CHANGE:
-- if (event == NETDEV_UP) {
-- if (!addrconf_qdisc_ok(dev)) {
-- /* device is not ready yet. */
-- printk(KERN_INFO
-- "ADDRCONF(NETDEV_UP): %s: "
-- "link is not ready\n",
-- dev->name);
-- break;
-- }
--
-- if (idev)
-- idev->if_flags |= IF_READY;
-- } else {
-- if (!addrconf_qdisc_ok(dev)) {
-- /* device is still not ready. */
-- break;
-- }
--
-- if (idev) {
-- if (idev->if_flags & IF_READY) {
-- /* device is already configured. */
-- break;
-- }
-- idev->if_flags |= IF_READY;
-- }
--
-- printk(KERN_INFO
-- "ADDRCONF(NETDEV_CHANGE): %s: "
-- "link becomes ready\n",
-- dev->name);
--
-- run_pending = 1;
-- }
--
-- switch(dev->type) {
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
-- case ARPHRD_SIT:
-- addrconf_sit_config(dev);
-- break;
--#endif
-- case ARPHRD_TUNNEL6:
-- addrconf_ip6_tnl_config(dev);
-- break;
-- case ARPHRD_LOOPBACK:
-- init_loopback(dev);
-- break;
--
-- default:
-- addrconf_dev_config(dev);
-- break;
-- }
-- if (idev) {
-- if (run_pending)
-- addrconf_dad_run(idev);
--
-- /* If the MTU changed during the interface down, when the
-- interface up, the changed MTU must be reflected in the
-- idev as well as routers.
-- */
-- if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
-- rt6_mtu_change(dev, dev->mtu);
-- idev->cnf.mtu6 = dev->mtu;
-- }
-- idev->tstamp = jiffies;
-- inet6_ifinfo_notify(RTM_NEWLINK, idev);
-- /* If the changed mtu during down is lower than IPV6_MIN_MTU
-- stop IPv6 on this interface.
-- */
-- if (dev->mtu < IPV6_MIN_MTU)
-- addrconf_ifdown(dev, event != NETDEV_DOWN);
-- }
-- break;
--
-- case NETDEV_CHANGEMTU:
-- if ( idev && dev->mtu >= IPV6_MIN_MTU) {
-- rt6_mtu_change(dev, dev->mtu);
-- idev->cnf.mtu6 = dev->mtu;
-- break;
-- }
--
-- /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
--
-- case NETDEV_DOWN:
-- case NETDEV_UNREGISTER:
-- /*
-- * Remove all addresses from this interface.
-- */
-- addrconf_ifdown(dev, event != NETDEV_DOWN);
-- break;
--
-- case NETDEV_CHANGENAME:
-- if (idev) {
-- snmp6_unregister_dev(idev);
--#ifdef CONFIG_SYSCTL
-- addrconf_sysctl_unregister(&idev->cnf);
-- neigh_sysctl_unregister(idev->nd_parms);
-- neigh_sysctl_register(dev, idev->nd_parms,
-- NET_IPV6, NET_IPV6_NEIGH, "ipv6",
-- &ndisc_ifinfo_sysctl_change,
-- NULL);
-- addrconf_sysctl_register(idev, &idev->cnf);
--#endif
-- snmp6_register_dev(idev);
-- }
-- break;
-- }
--
-- return NOTIFY_OK;
--}
--
--/*
-- * addrconf module should be notified of a device going up
-- */
--static struct notifier_block ipv6_dev_notf = {
-- .notifier_call = addrconf_notify,
-- .priority = 0
--};
--
--static int addrconf_ifdown(struct net_device *dev, int how)
--{
-- struct inet6_dev *idev;
-- struct inet6_ifaddr *ifa, **bifa;
-- int i;
--
-- ASSERT_RTNL();
--
++ for_each_netdev(&init_net, dev) {
+ struct in_device * in_dev = __in_dev_get_rtnl(dev);
+ if (in_dev && (dev->flags & IFF_UP)) {
+ struct in_ifaddr * ifa;
+@@ -2245,12 +2246,12 @@
+
+ /* first try to inherit the link-local address from the link device */
+ if (idev->dev->iflink &&
+- (link_dev = __dev_get_by_index(idev->dev->iflink))) {
++ (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) {
+ if (!ipv6_inherit_linklocal(idev, link_dev))
+ return;
+ }
+ /* then try to inherit it from any device */
+- for_each_netdev(link_dev) {
++ for_each_netdev(&init_net, link_dev) {
+ if (!ipv6_inherit_linklocal(idev, link_dev))
+ return;
+ }
+@@ -2282,6 +2283,9 @@
+ struct inet6_dev *idev = __in6_dev_get(dev);
+ int run_pending = 0;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ switch(event) {
+ case NETDEV_REGISTER:
+ if (!idev && dev->mtu >= IPV6_MIN_MTU) {
+@@ -2419,7 +2423,7 @@
+
+ ASSERT_RTNL();
+
- if (dev == &loopback_dev && how == 1)
-- how = 0;
--
-- rt6_ifdown(dev);
-- neigh_ifdown(&nd_tbl, dev);
--
-- idev = __in6_dev_get(dev);
-- if (idev == NULL)
-- return -ENODEV;
--
-- /* Step 1: remove reference to ipv6 device from parent device.
-- Do not dev_put!
-- */
-- if (how == 1) {
-- idev->dead = 1;
--
-- /* protected by rtnl_lock */
-- rcu_assign_pointer(dev->ip6_ptr, NULL);
--
-- /* Step 1.5: remove snmp6 entry */
-- snmp6_unregister_dev(idev);
--
-- }
--
-- /* Step 2: clear hash table */
-- for (i=0; i<IN6_ADDR_HSIZE; i++) {
-- bifa = &inet6_addr_lst[i];
--
-- write_lock_bh(&addrconf_hash_lock);
-- while ((ifa = *bifa) != NULL) {
-- if (ifa->idev == idev) {
-- *bifa = ifa->lst_next;
-- ifa->lst_next = NULL;
-- addrconf_del_timer(ifa);
-- in6_ifa_put(ifa);
-- continue;
-- }
-- bifa = &ifa->lst_next;
-- }
-- write_unlock_bh(&addrconf_hash_lock);
-- }
--
-- write_lock_bh(&idev->lock);
--
-- /* Step 3: clear flags for stateless addrconf */
-- if (how != 1)
-- idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
--
-- /* Step 4: clear address list */
--#ifdef CONFIG_IPV6_PRIVACY
-- if (how == 1 && del_timer(&idev->regen_timer))
-- in6_dev_put(idev);
--
-- /* clear tempaddr list */
-- while ((ifa = idev->tempaddr_list) != NULL) {
-- idev->tempaddr_list = ifa->tmp_next;
-- ifa->tmp_next = NULL;
-- ifa->dead = 1;
-- write_unlock_bh(&idev->lock);
-- spin_lock_bh(&ifa->lock);
--
-- if (ifa->ifpub) {
-- in6_ifa_put(ifa->ifpub);
-- ifa->ifpub = NULL;
-- }
-- spin_unlock_bh(&ifa->lock);
-- in6_ifa_put(ifa);
-- write_lock_bh(&idev->lock);
-- }
--#endif
-- while ((ifa = idev->addr_list) != NULL) {
-- idev->addr_list = ifa->if_next;
-- ifa->if_next = NULL;
-- ifa->dead = 1;
-- addrconf_del_timer(ifa);
-- write_unlock_bh(&idev->lock);
--
-- __ipv6_ifa_notify(RTM_DELADDR, ifa);
-- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
-- in6_ifa_put(ifa);
--
-- write_lock_bh(&idev->lock);
-- }
-- write_unlock_bh(&idev->lock);
--
-- /* Step 5: Discard multicast list */
--
-- if (how == 1)
-- ipv6_mc_destroy_dev(idev);
-- else
-- ipv6_mc_down(idev);
--
-- /* Step 5: netlink notification of this interface */
-- idev->tstamp = jiffies;
-- inet6_ifinfo_notify(RTM_DELLINK, idev);
--
-- /* Shot the device (if unregistered) */
--
-- if (how == 1) {
--#ifdef CONFIG_SYSCTL
-- addrconf_sysctl_unregister(&idev->cnf);
-- neigh_sysctl_unregister(idev->nd_parms);
--#endif
-- neigh_parms_release(&nd_tbl, idev->nd_parms);
-- neigh_ifdown(&nd_tbl, dev);
-- in6_dev_put(idev);
-- }
-- return 0;
--}
--
--static void addrconf_rs_timer(unsigned long data)
--{
-- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
--
-- if (ifp->idev->cnf.forwarding)
-- goto out;
--
-- if (ifp->idev->if_flags & IF_RA_RCVD) {
-- /*
-- * Announcement received after solicitation
-- * was sent
-- */
-- goto out;
-- }
--
-- spin_lock(&ifp->lock);
-- if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) {
-- struct in6_addr all_routers;
--
-- /* The wait after the last probe can be shorter */
-- addrconf_mod_timer(ifp, AC_RS,
-- (ifp->probes == ifp->idev->cnf.rtr_solicits) ?
-- ifp->idev->cnf.rtr_solicit_delay :
-- ifp->idev->cnf.rtr_solicit_interval);
-- spin_unlock(&ifp->lock);
--
-- ipv6_addr_all_routers(&all_routers);
--
-- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
-- } else {
-- spin_unlock(&ifp->lock);
-- /*
-- * Note: we do not support deprecated "all on-link"
-- * assumption any longer.
-- */
-- printk(KERN_DEBUG "%s: no IPv6 routers present\n",
-- ifp->idev->dev->name);
-- }
--
--out:
-- in6_ifa_put(ifp);
--}
--
--/*
-- * Duplicate Address Detection
-- */
--static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
--{
-- unsigned long rand_num;
-- struct inet6_dev *idev = ifp->idev;
--
-- if (ifp->flags & IFA_F_OPTIMISTIC)
-- rand_num = 0;
-- else
-- rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
--
-- ifp->probes = idev->cnf.dad_transmits;
-- addrconf_mod_timer(ifp, AC_DAD, rand_num);
--}
--
--static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
--{
-- struct inet6_dev *idev = ifp->idev;
-- struct net_device *dev = idev->dev;
--
-- addrconf_join_solict(dev, &ifp->addr);
--
-- net_srandom(ifp->addr.s6_addr32[3]);
--
-- read_lock_bh(&idev->lock);
-- if (ifp->dead)
-- goto out;
-- spin_lock_bh(&ifp->lock);
--
-- if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
-- !(ifp->flags&IFA_F_TENTATIVE) ||
-- ifp->flags & IFA_F_NODAD) {
-- ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
-- spin_unlock_bh(&ifp->lock);
-- read_unlock_bh(&idev->lock);
--
-- addrconf_dad_completed(ifp);
-- return;
-- }
--
-- if (!(idev->if_flags & IF_READY)) {
-- spin_unlock_bh(&ifp->lock);
-- read_unlock_bh(&idev->lock);
-- /*
-- * If the defice is not ready:
-- * - keep it tentative if it is a permanent address.
-- * - otherwise, kill it.
-- */
-- in6_ifa_hold(ifp);
-- addrconf_dad_stop(ifp);
-- return;
-- }
--
-- /*
-- * Optimistic nodes can start receiving
-- * Frames right away
-- */
-- if(ifp->flags & IFA_F_OPTIMISTIC)
-- ip6_ins_rt(ifp->rt);
--
-- addrconf_dad_kick(ifp);
-- spin_unlock_bh(&ifp->lock);
--out:
-- read_unlock_bh(&idev->lock);
--}
--
--static void addrconf_dad_timer(unsigned long data)
--{
-- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
-- struct inet6_dev *idev = ifp->idev;
-- struct in6_addr unspec;
-- struct in6_addr mcaddr;
--
-- read_lock_bh(&idev->lock);
-- if (idev->dead) {
-- read_unlock_bh(&idev->lock);
-- goto out;
-- }
-- spin_lock_bh(&ifp->lock);
-- if (ifp->probes == 0) {
-- /*
-- * DAD was successful
-- */
--
-- ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
-- spin_unlock_bh(&ifp->lock);
-- read_unlock_bh(&idev->lock);
--
-- addrconf_dad_completed(ifp);
--
-- goto out;
-- }
--
-- ifp->probes--;
-- addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
-- spin_unlock_bh(&ifp->lock);
-- read_unlock_bh(&idev->lock);
--
-- /* send a neighbour solicitation for our addr */
-- memset(&unspec, 0, sizeof(unspec));
-- addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
-- ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
--out:
-- in6_ifa_put(ifp);
--}
--
--static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
--{
-- struct net_device * dev = ifp->idev->dev;
--
-- /*
-- * Configure the address for reception. Now it is valid.
-- */
--
-- ipv6_ifa_notify(RTM_NEWADDR, ifp);
--
-- /* If added prefix is link local and forwarding is off,
-- start sending router solicitations.
-- */
--
-- if (ifp->idev->cnf.forwarding == 0 &&
-- ifp->idev->cnf.rtr_solicits > 0 &&
-- (dev->flags&IFF_LOOPBACK) == 0 &&
-- (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
-- struct in6_addr all_routers;
--
-- ipv6_addr_all_routers(&all_routers);
--
-- /*
-- * If a host as already performed a random delay
-- * [...] as part of DAD [...] there is no need
-- * to delay again before sending the first RS
-- */
-- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
--
-- spin_lock_bh(&ifp->lock);
-- ifp->probes = 1;
-- ifp->idev->if_flags |= IF_RS_SENT;
-- addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval);
-- spin_unlock_bh(&ifp->lock);
-- }
--}
--
--static void addrconf_dad_run(struct inet6_dev *idev) {
-- struct inet6_ifaddr *ifp;
--
-- read_lock_bh(&idev->lock);
-- for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
-- spin_lock_bh(&ifp->lock);
-- if (!(ifp->flags & IFA_F_TENTATIVE)) {
-- spin_unlock_bh(&ifp->lock);
-- continue;
-- }
-- spin_unlock_bh(&ifp->lock);
-- addrconf_dad_kick(ifp);
-- }
-- read_unlock_bh(&idev->lock);
--}
--
--#ifdef CONFIG_PROC_FS
--struct if6_iter_state {
-- int bucket;
--};
--
--static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
--{
-- struct inet6_ifaddr *ifa = NULL;
-- struct if6_iter_state *state = seq->private;
--
-- for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
-- ifa = inet6_addr_lst[state->bucket];
-- if (ifa)
-- break;
-- }
-- return ifa;
--}
--
--static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
--{
-- struct if6_iter_state *state = seq->private;
--
-- ifa = ifa->lst_next;
--try_again:
-- if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
-- ifa = inet6_addr_lst[state->bucket];
-- goto try_again;
-- }
-- return ifa;
--}
--
--static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
--{
-- struct inet6_ifaddr *ifa = if6_get_first(seq);
--
-- if (ifa)
-- while(pos && (ifa = if6_get_next(seq, ifa)) != NULL)
-- --pos;
-- return pos ? NULL : ifa;
--}
--
--static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
--{
-- read_lock_bh(&addrconf_hash_lock);
-- return if6_get_idx(seq, *pos);
--}
--
--static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
--{
-- struct inet6_ifaddr *ifa;
--
-- ifa = if6_get_next(seq, v);
-- ++*pos;
-- return ifa;
--}
--
--static void if6_seq_stop(struct seq_file *seq, void *v)
--{
-- read_unlock_bh(&addrconf_hash_lock);
--}
--
--static int if6_seq_show(struct seq_file *seq, void *v)
--{
-- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
-- seq_printf(seq,
-- NIP6_SEQFMT " %02x %02x %02x %02x %8s\n",
-- NIP6(ifp->addr),
-- ifp->idev->dev->ifindex,
-- ifp->prefix_len,
-- ifp->scope,
-- ifp->flags,
-- ifp->idev->dev->name);
-- return 0;
--}
--
--static struct seq_operations if6_seq_ops = {
-- .start = if6_seq_start,
-- .next = if6_seq_next,
-- .show = if6_seq_show,
-- .stop = if6_seq_stop,
--};
--
--static int if6_seq_open(struct inode *inode, struct file *file)
--{
-- struct seq_file *seq;
-- int rc = -ENOMEM;
-- struct if6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
--
-- if (!s)
-- goto out;
--
-- rc = seq_open(file, &if6_seq_ops);
-- if (rc)
-- goto out_kfree;
--
-- seq = file->private_data;
-- seq->private = s;
--out:
-- return rc;
--out_kfree:
-- kfree(s);
-- goto out;
--}
--
--static const struct file_operations if6_fops = {
-- .owner = THIS_MODULE,
-- .open = if6_seq_open,
-- .read = seq_read,
-- .llseek = seq_lseek,
-- .release = seq_release_private,
--};
--
--int __init if6_proc_init(void)
--{
++ if (dev == &init_net.loopback_dev && how == 1)
+ how = 0;
+
+ rt6_ifdown(dev);
+@@ -2850,18 +2854,18 @@
+
+ int __init if6_proc_init(void)
+ {
- if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
-- return -ENOMEM;
-- return 0;
--}
--
--void if6_proc_exit(void)
--{
++ if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops))
+ return -ENOMEM;
+ return 0;
+ }
+
+ void if6_proc_exit(void)
+ {
- proc_net_remove("if_inet6");
--}
--#endif /* CONFIG_PROC_FS */
--
++ proc_net_remove(&init_net, "if_inet6");
+ }
+ #endif /* CONFIG_PROC_FS */
+
-#ifdef CONFIG_IPV6_MIP6
--/* Check if address is a home address configured on any interface. */
--int ipv6_chk_home_addr(struct in6_addr *addr)
--{
-- int ret = 0;
-- struct inet6_ifaddr * ifp;
-- u8 hash = ipv6_addr_hash(addr);
-- read_lock_bh(&addrconf_hash_lock);
-- for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
-- if (ipv6_addr_cmp(&ifp->addr, addr) == 0 &&
-- (ifp->flags & IFA_F_HOMEADDRESS)) {
-- ret = 1;
-- break;
-- }
-- }
-- read_unlock_bh(&addrconf_hash_lock);
-- return ret;
--}
--#endif
--
--/*
-- * Periodic address status verification
-- */
--
--static void addrconf_verify(unsigned long foo)
--{
-- struct inet6_ifaddr *ifp;
-- unsigned long now, next;
-- int i;
--
-- spin_lock_bh(&addrconf_verify_lock);
-- now = jiffies;
-- next = now + ADDR_CHECK_FREQUENCY;
--
-- del_timer(&addr_chk_timer);
--
-- for (i=0; i < IN6_ADDR_HSIZE; i++) {
--
--restart:
-- read_lock(&addrconf_hash_lock);
-- for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
-- unsigned long age;
--#ifdef CONFIG_IPV6_PRIVACY
-- unsigned long regen_advance;
--#endif
--
-- if (ifp->flags & IFA_F_PERMANENT)
-- continue;
--
-- spin_lock(&ifp->lock);
-- age = (now - ifp->tstamp) / HZ;
--
--#ifdef CONFIG_IPV6_PRIVACY
-- regen_advance = ifp->idev->cnf.regen_max_retry *
-- ifp->idev->cnf.dad_transmits *
-- ifp->idev->nd_parms->retrans_time / HZ;
--#endif
--
-- if (ifp->valid_lft != INFINITY_LIFE_TIME &&
-- age >= ifp->valid_lft) {
-- spin_unlock(&ifp->lock);
-- in6_ifa_hold(ifp);
-- read_unlock(&addrconf_hash_lock);
-- ipv6_del_addr(ifp);
-- goto restart;
-- } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
-- spin_unlock(&ifp->lock);
-- continue;
-- } else if (age >= ifp->prefered_lft) {
-- /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */
-- int deprecate = 0;
--
-- if (!(ifp->flags&IFA_F_DEPRECATED)) {
-- deprecate = 1;
-- ifp->flags |= IFA_F_DEPRECATED;
-- }
--
-- if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))
-- next = ifp->tstamp + ifp->valid_lft * HZ;
--
-- spin_unlock(&ifp->lock);
--
-- if (deprecate) {
-- in6_ifa_hold(ifp);
-- read_unlock(&addrconf_hash_lock);
--
-- ipv6_ifa_notify(0, ifp);
-- in6_ifa_put(ifp);
-- goto restart;
-- }
--#ifdef CONFIG_IPV6_PRIVACY
-- } else if ((ifp->flags&IFA_F_TEMPORARY) &&
-- !(ifp->flags&IFA_F_TENTATIVE)) {
-- if (age >= ifp->prefered_lft - regen_advance) {
-- struct inet6_ifaddr *ifpub = ifp->ifpub;
-- if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
-- next = ifp->tstamp + ifp->prefered_lft * HZ;
-- if (!ifp->regen_count && ifpub) {
-- ifp->regen_count++;
-- in6_ifa_hold(ifp);
-- in6_ifa_hold(ifpub);
-- spin_unlock(&ifp->lock);
-- read_unlock(&addrconf_hash_lock);
-- spin_lock(&ifpub->lock);
-- ifpub->regen_count = 0;
-- spin_unlock(&ifpub->lock);
-- ipv6_create_tempaddr(ifpub, ifp);
-- in6_ifa_put(ifpub);
-- in6_ifa_put(ifp);
-- goto restart;
-- }
-- } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
-- next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
-- spin_unlock(&ifp->lock);
--#endif
-- } else {
-- /* ifp->prefered_lft <= ifp->valid_lft */
-- if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
-- next = ifp->tstamp + ifp->prefered_lft * HZ;
-- spin_unlock(&ifp->lock);
-- }
-- }
-- read_unlock(&addrconf_hash_lock);
-- }
--
-- addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
-- add_timer(&addr_chk_timer);
-- spin_unlock_bh(&addrconf_verify_lock);
--}
--
--static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
--{
-- struct in6_addr *pfx = NULL;
--
-- if (addr)
-- pfx = nla_data(addr);
--
-- if (local) {
-- if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
-- pfx = NULL;
-- else
-- pfx = nla_data(local);
-- }
--
-- return pfx;
--}
--
--static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
-- [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) },
-- [IFA_LOCAL] = { .len = sizeof(struct in6_addr) },
-- [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
--};
--
--static int
--inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
--{
-- struct ifaddrmsg *ifm;
-- struct nlattr *tb[IFA_MAX+1];
-- struct in6_addr *pfx;
-- int err;
--
-- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
-- if (err < 0)
-- return err;
--
-- ifm = nlmsg_data(nlh);
-- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
-- if (pfx == NULL)
-- return -EINVAL;
--
-- return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
--}
--
--static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
-- u32 prefered_lft, u32 valid_lft)
--{
-- u32 flags = RTF_EXPIRES;
--
-- if (!valid_lft || (prefered_lft > valid_lft))
-- return -EINVAL;
--
-- if (valid_lft == INFINITY_LIFE_TIME) {
-- ifa_flags |= IFA_F_PERMANENT;
-- flags = 0;
-- } else if (valid_lft >= 0x7FFFFFFF/HZ)
-- valid_lft = 0x7FFFFFFF/HZ;
--
-- if (prefered_lft == 0)
-- ifa_flags |= IFA_F_DEPRECATED;
-- else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
-- (prefered_lft != INFINITY_LIFE_TIME))
-- prefered_lft = 0x7FFFFFFF/HZ;
--
-- spin_lock_bh(&ifp->lock);
-- ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
-- ifp->tstamp = jiffies;
-- ifp->valid_lft = valid_lft;
-- ifp->prefered_lft = prefered_lft;
--
-- spin_unlock_bh(&ifp->lock);
-- if (!(ifp->flags&IFA_F_TENTATIVE))
-- ipv6_ifa_notify(0, ifp);
--
-- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
-- jiffies_to_clock_t(valid_lft * HZ), flags);
-- addrconf_verify(0);
--
-- return 0;
--}
--
--static int
--inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
--{
-- struct ifaddrmsg *ifm;
-- struct nlattr *tb[IFA_MAX+1];
-- struct in6_addr *pfx;
-- struct inet6_ifaddr *ifa;
-- struct net_device *dev;
-- u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
-- u8 ifa_flags;
-- int err;
--
-- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
-- if (err < 0)
-- return err;
--
-- ifm = nlmsg_data(nlh);
-- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
-- if (pfx == NULL)
-- return -EINVAL;
--
-- if (tb[IFA_CACHEINFO]) {
-- struct ifa_cacheinfo *ci;
--
-- ci = nla_data(tb[IFA_CACHEINFO]);
-- valid_lft = ci->ifa_valid;
-- preferred_lft = ci->ifa_prefered;
-- } else {
-- preferred_lft = INFINITY_LIFE_TIME;
-- valid_lft = INFINITY_LIFE_TIME;
-- }
--
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ /* Check if address is a home address configured on any interface. */
+ int ipv6_chk_home_addr(struct in6_addr *addr)
+ {
+@@ -3017,11 +3021,15 @@
+ static int
+ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct ifaddrmsg *ifm;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in6_addr *pfx;
+ int err;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ if (err < 0)
+ return err;
+@@ -3074,6 +3082,7 @@
+ static int
+ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct ifaddrmsg *ifm;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in6_addr *pfx;
+@@ -3083,6 +3092,9 @@
+ u8 ifa_flags;
+ int err;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ if (err < 0)
+ return err;
+@@ -3103,7 +3115,7 @@
+ valid_lft = INFINITY_LIFE_TIME;
+ }
+
- dev = __dev_get_by_index(ifm->ifa_index);
-- if (dev == NULL)
-- return -ENODEV;
--
-- /* We ignore other flags so far. */
-- ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
--
-- ifa = ipv6_get_ifaddr(pfx, dev, 1);
-- if (ifa == NULL) {
-- /*
-- * It would be best to check for !NLM_F_CREATE here but
-- * userspace alreay relies on not having to provide this.
-- */
-- return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen,
-- ifa_flags, preferred_lft, valid_lft);
-- }
--
-- if (nlh->nlmsg_flags & NLM_F_EXCL ||
-- !(nlh->nlmsg_flags & NLM_F_REPLACE))
-- err = -EEXIST;
-- else
-- err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft);
--
-- in6_ifa_put(ifa);
--
-- return err;
--}
--
--static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags,
-- u8 scope, int ifindex)
--{
-- struct ifaddrmsg *ifm;
--
-- ifm = nlmsg_data(nlh);
-- ifm->ifa_family = AF_INET6;
-- ifm->ifa_prefixlen = prefixlen;
-- ifm->ifa_flags = flags;
-- ifm->ifa_scope = scope;
-- ifm->ifa_index = ifindex;
--}
--
--static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
-- unsigned long tstamp, u32 preferred, u32 valid)
--{
-- struct ifa_cacheinfo ci;
--
-- ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100
-- + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-- ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
-- + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-- ci.ifa_prefered = preferred;
-- ci.ifa_valid = valid;
--
-- return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
--}
--
--static inline int rt_scope(int ifa_scope)
--{
-- if (ifa_scope & IFA_HOST)
-- return RT_SCOPE_HOST;
-- else if (ifa_scope & IFA_LINK)
-- return RT_SCOPE_LINK;
-- else if (ifa_scope & IFA_SITE)
-- return RT_SCOPE_SITE;
-- else
-- return RT_SCOPE_UNIVERSE;
--}
--
--static inline int inet6_ifaddr_msgsize(void)
--{
-- return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
-- + nla_total_size(16) /* IFA_ADDRESS */
-- + nla_total_size(sizeof(struct ifa_cacheinfo));
--}
--
--static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
-- u32 pid, u32 seq, int event, unsigned int flags)
--{
-- struct nlmsghdr *nlh;
-- u32 preferred, valid;
--
-- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
-- if (nlh == NULL)
-- return -EMSGSIZE;
--
-- put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
-- ifa->idev->dev->ifindex);
--
-- if (!(ifa->flags&IFA_F_PERMANENT)) {
-- preferred = ifa->prefered_lft;
-- valid = ifa->valid_lft;
-- if (preferred != INFINITY_LIFE_TIME) {
-- long tval = (jiffies - ifa->tstamp)/HZ;
-- preferred -= tval;
-- if (valid != INFINITY_LIFE_TIME)
-- valid -= tval;
-- }
-- } else {
-- preferred = INFINITY_LIFE_TIME;
-- valid = INFINITY_LIFE_TIME;
-- }
--
-- if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 ||
-- put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) {
-- nlmsg_cancel(skb, nlh);
-- return -EMSGSIZE;
-- }
--
-- return nlmsg_end(skb, nlh);
--}
--
--static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
-- u32 pid, u32 seq, int event, u16 flags)
--{
-- struct nlmsghdr *nlh;
-- u8 scope = RT_SCOPE_UNIVERSE;
-- int ifindex = ifmca->idev->dev->ifindex;
--
-- if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
-- scope = RT_SCOPE_SITE;
--
-- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
-- if (nlh == NULL)
-- return -EMSGSIZE;
--
-- put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
-- if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 ||
-- put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
-- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
-- nlmsg_cancel(skb, nlh);
-- return -EMSGSIZE;
-- }
--
-- return nlmsg_end(skb, nlh);
--}
--
--static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
-- u32 pid, u32 seq, int event, unsigned int flags)
--{
-- struct nlmsghdr *nlh;
-- u8 scope = RT_SCOPE_UNIVERSE;
-- int ifindex = ifaca->aca_idev->dev->ifindex;
--
-- if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
-- scope = RT_SCOPE_SITE;
--
-- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
-- if (nlh == NULL)
-- return -EMSGSIZE;
--
-- put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
-- if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 ||
-- put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
-- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
-- nlmsg_cancel(skb, nlh);
-- return -EMSGSIZE;
-- }
--
-- return nlmsg_end(skb, nlh);
--}
--
--enum addr_type_t
--{
-- UNICAST_ADDR,
-- MULTICAST_ADDR,
-- ANYCAST_ADDR,
--};
--
--static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
-- enum addr_type_t type)
--{
-- int idx, ip_idx;
-- int s_idx, s_ip_idx;
-- int err = 1;
-- struct net_device *dev;
-- struct inet6_dev *idev = NULL;
-- struct inet6_ifaddr *ifa;
-- struct ifmcaddr6 *ifmca;
-- struct ifacaddr6 *ifaca;
--
-- s_idx = cb->args[0];
-- s_ip_idx = ip_idx = cb->args[1];
--
-- idx = 0;
++ dev = __dev_get_by_index(&init_net, ifm->ifa_index);
+ if (dev == NULL)
+ return -ENODEV;
+
+@@ -3292,7 +3304,7 @@
+ s_ip_idx = ip_idx = cb->args[1];
+
+ idx = 0;
- for_each_netdev(dev) {
-- if (idx < s_idx)
-- goto cont;
-- if (idx > s_idx)
-- s_ip_idx = 0;
-- ip_idx = 0;
-- if ((idev = in6_dev_get(dev)) == NULL)
-- goto cont;
-- read_lock_bh(&idev->lock);
-- switch (type) {
-- case UNICAST_ADDR:
-- /* unicast address incl. temp addr */
-- for (ifa = idev->addr_list; ifa;
-- ifa = ifa->if_next, ip_idx++) {
-- if (ip_idx < s_ip_idx)
-- continue;
-- if ((err = inet6_fill_ifaddr(skb, ifa,
-- NETLINK_CB(cb->skb).pid,
-- cb->nlh->nlmsg_seq, RTM_NEWADDR,
-- NLM_F_MULTI)) <= 0)
-- goto done;
-- }
-- break;
-- case MULTICAST_ADDR:
-- /* multicast address */
-- for (ifmca = idev->mc_list; ifmca;
-- ifmca = ifmca->next, ip_idx++) {
-- if (ip_idx < s_ip_idx)
-- continue;
-- if ((err = inet6_fill_ifmcaddr(skb, ifmca,
-- NETLINK_CB(cb->skb).pid,
-- cb->nlh->nlmsg_seq, RTM_GETMULTICAST,
-- NLM_F_MULTI)) <= 0)
-- goto done;
-- }
-- break;
-- case ANYCAST_ADDR:
-- /* anycast address */
-- for (ifaca = idev->ac_list; ifaca;
-- ifaca = ifaca->aca_next, ip_idx++) {
-- if (ip_idx < s_ip_idx)
-- continue;
-- if ((err = inet6_fill_ifacaddr(skb, ifaca,
-- NETLINK_CB(cb->skb).pid,
-- cb->nlh->nlmsg_seq, RTM_GETANYCAST,
-- NLM_F_MULTI)) <= 0)
-- goto done;
-- }
-- break;
-- default:
-- break;
-- }
-- read_unlock_bh(&idev->lock);
-- in6_dev_put(idev);
--cont:
-- idx++;
-- }
--done:
-- if (err <= 0) {
-- read_unlock_bh(&idev->lock);
-- in6_dev_put(idev);
-- }
-- cb->args[0] = idx;
-- cb->args[1] = ip_idx;
-- return skb->len;
--}
--
--static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
--{
-- enum addr_type_t type = UNICAST_ADDR;
-- return inet6_dump_addr(skb, cb, type);
--}
--
--static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
--{
-- enum addr_type_t type = MULTICAST_ADDR;
-- return inet6_dump_addr(skb, cb, type);
--}
--
--
--static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
--{
-- enum addr_type_t type = ANYCAST_ADDR;
-- return inet6_dump_addr(skb, cb, type);
--}
--
--static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
-- void *arg)
--{
-- struct ifaddrmsg *ifm;
-- struct nlattr *tb[IFA_MAX+1];
-- struct in6_addr *addr = NULL;
-- struct net_device *dev = NULL;
-- struct inet6_ifaddr *ifa;
-- struct sk_buff *skb;
-- int err;
--
-- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
-- if (err < 0)
-- goto errout;
--
-- addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
-- if (addr == NULL) {
-- err = -EINVAL;
-- goto errout;
-- }
--
-- ifm = nlmsg_data(nlh);
-- if (ifm->ifa_index)
++ for_each_netdev(&init_net, dev) {
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+@@ -3367,26 +3379,42 @@
+
+ static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ enum addr_type_t type = UNICAST_ADDR;
++
++ if (net != &init_net)
++ return 0;
++
+ return inet6_dump_addr(skb, cb, type);
+ }
+
+ static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ enum addr_type_t type = MULTICAST_ADDR;
++
++ if (net != &init_net)
++ return 0;
++
+ return inet6_dump_addr(skb, cb, type);
+ }
+
+
+ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ enum addr_type_t type = ANYCAST_ADDR;
++
++ if (net != &init_net)
++ return 0;
++
+ return inet6_dump_addr(skb, cb, type);
+ }
+
+ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
+ void *arg)
+ {
++ struct net *net = in_skb->sk->sk_net;
+ struct ifaddrmsg *ifm;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in6_addr *addr = NULL;
+@@ -3395,6 +3423,9 @@
+ struct sk_buff *skb;
+ int err;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ if (err < 0)
+ goto errout;
+@@ -3407,7 +3438,7 @@
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifa_index)
- dev = __dev_get_by_index(ifm->ifa_index);
--
-- if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
-- err = -EADDRNOTAVAIL;
-- goto errout;
-- }
--
-- if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) {
-- err = -ENOBUFS;
-- goto errout_ifa;
-- }
--
-- err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
-- nlh->nlmsg_seq, RTM_NEWADDR, 0);
-- if (err < 0) {
-- /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
-- WARN_ON(err == -EMSGSIZE);
-- kfree_skb(skb);
-- goto errout_ifa;
-- }
++ dev = __dev_get_by_index(&init_net, ifm->ifa_index);
+
+ if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
+ err = -EADDRNOTAVAIL;
+@@ -3427,7 +3458,7 @@
+ kfree_skb(skb);
+ goto errout_ifa;
+ }
- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
--errout_ifa:
-- in6_ifa_put(ifa);
--errout:
-- return err;
--}
--
--static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
--{
-- struct sk_buff *skb;
-- int err = -ENOBUFS;
--
-- skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
-- if (skb == NULL)
-- goto errout;
--
-- err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
-- if (err < 0) {
-- /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
-- WARN_ON(err == -EMSGSIZE);
-- kfree_skb(skb);
-- goto errout;
-- }
++ err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+ errout_ifa:
+ in6_ifa_put(ifa);
+ errout:
+@@ -3450,10 +3481,10 @@
+ kfree_skb(skb);
+ goto errout;
+ }
- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
--errout:
-- if (err < 0)
++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+ errout:
+ if (err < 0)
- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
--}
--
--static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
-- __s32 *array, int bytes)
--{
-- BUG_ON(bytes < (DEVCONF_MAX * 4));
--
-- memset(array, 0, bytes);
-- array[DEVCONF_FORWARDING] = cnf->forwarding;
-- array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
-- array[DEVCONF_MTU6] = cnf->mtu6;
-- array[DEVCONF_ACCEPT_RA] = cnf->accept_ra;
-- array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects;
-- array[DEVCONF_AUTOCONF] = cnf->autoconf;
-- array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
-- array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
-- array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval;
-- array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay;
-- array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
--#ifdef CONFIG_IPV6_PRIVACY
-- array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
-- array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
-- array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
-- array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
-- array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
--#endif
-- array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
-- array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
-- array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
--#ifdef CONFIG_IPV6_ROUTER_PREF
-- array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
-- array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval;
--#ifdef CONFIG_IPV6_ROUTE_INFO
-- array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
--#endif
--#endif
-- array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
-- array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
--#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-- array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
--#endif
--}
--
--static inline size_t inet6_if_nlmsg_size(void)
--{
-- return NLMSG_ALIGN(sizeof(struct ifinfomsg))
-- + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
-- + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
-- + nla_total_size(4) /* IFLA_MTU */
-- + nla_total_size(4) /* IFLA_LINK */
-- + nla_total_size( /* IFLA_PROTINFO */
-- nla_total_size(4) /* IFLA_INET6_FLAGS */
-- + nla_total_size(sizeof(struct ifla_cacheinfo))
-- + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
-- + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
-- + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
-- );
--}
--
--static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items,
-- int bytes)
--{
-- int i;
-- int pad = bytes - sizeof(u64) * items;
-- BUG_ON(pad < 0);
--
-- /* Use put_unaligned() because stats may not be aligned for u64. */
-- put_unaligned(items, &stats[0]);
-- for (i = 1; i < items; i++)
-- put_unaligned(snmp_fold_field(mib, i), &stats[i]);
--
-- memset(&stats[items], 0, pad);
--}
--
--static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
-- int bytes)
--{
-- switch(attrtype) {
-- case IFLA_INET6_STATS:
-- __snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
-- break;
-- case IFLA_INET6_ICMP6STATS:
-- __snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
-- break;
-- }
--}
--
--static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
-- u32 pid, u32 seq, int event, unsigned int flags)
--{
-- struct net_device *dev = idev->dev;
-- struct nlattr *nla;
-- struct ifinfomsg *hdr;
-- struct nlmsghdr *nlh;
-- void *protoinfo;
-- struct ifla_cacheinfo ci;
--
-- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
-- if (nlh == NULL)
-- return -EMSGSIZE;
--
-- hdr = nlmsg_data(nlh);
-- hdr->ifi_family = AF_INET6;
-- hdr->__ifi_pad = 0;
-- hdr->ifi_type = dev->type;
-- hdr->ifi_index = dev->ifindex;
-- hdr->ifi_flags = dev_get_flags(dev);
-- hdr->ifi_change = 0;
--
-- NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
--
-- if (dev->addr_len)
-- NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
--
-- NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
-- if (dev->ifindex != dev->iflink)
-- NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
--
-- protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
-- if (protoinfo == NULL)
-- goto nla_put_failure;
--
-- NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
--
-- ci.max_reasm_len = IPV6_MAXPLEN;
-- ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
-- + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-- ci.reachable_time = idev->nd_parms->reachable_time;
-- ci.retrans_time = idev->nd_parms->retrans_time;
-- NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
--
-- nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
-- if (nla == NULL)
-- goto nla_put_failure;
-- ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
--
-- /* XXX - MC not implemented */
--
-- nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
-- if (nla == NULL)
-- goto nla_put_failure;
-- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
--
-- nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
-- if (nla == NULL)
-- goto nla_put_failure;
-- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
--
-- nla_nest_end(skb, protoinfo);
-- return nlmsg_end(skb, nlh);
--
--nla_put_failure:
-- nlmsg_cancel(skb, nlh);
-- return -EMSGSIZE;
--}
--
--static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
--{
-- int idx, err;
-- int s_idx = cb->args[0];
-- struct net_device *dev;
-- struct inet6_dev *idev;
--
-- read_lock(&dev_base_lock);
-- idx = 0;
++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
+ }
+
+ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
+@@ -3612,19 +3643,22 @@
+
+ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ int idx, err;
+ int s_idx = cb->args[0];
+ struct net_device *dev;
+ struct inet6_dev *idev;
+ struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
+
++ if (net != &init_net)
++ return 0;
+ /* FIXME: maybe disable ipv6 on non v6 guests?
+ if (skb->sk && skb->sk->sk_vx_info)
+ return skb->len; */
+
+ read_lock(&dev_base_lock);
+ idx = 0;
- for_each_netdev(dev) {
-- if (idx < s_idx)
-- goto cont;
-- if ((idev = in6_dev_get(dev)) == NULL)
-- goto cont;
-- err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
-- cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
-- in6_dev_put(idev);
-- if (err <= 0)
-- break;
--cont:
-- idx++;
-- }
-- read_unlock(&dev_base_lock);
-- cb->args[0] = idx;
--
-- return skb->len;
--}
--
--void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
--{
-- struct sk_buff *skb;
-- int err = -ENOBUFS;
--
-- skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC);
-- if (skb == NULL)
-- goto errout;
--
-- err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
-- if (err < 0) {
-- /* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */
-- WARN_ON(err == -EMSGSIZE);
-- kfree_skb(skb);
-- goto errout;
-- }
++ for_each_netdev(&init_net, dev) {
+ if (idx < s_idx)
+ goto cont;
+ if (!v6_dev_in_nx_info(dev, nxi))
+@@ -3661,10 +3695,10 @@
+ kfree_skb(skb);
+ goto errout;
+ }
- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
--errout:
-- if (err < 0)
++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+ errout:
+ if (err < 0)
- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
--}
--
--static inline size_t inet6_prefix_nlmsg_size(void)
--{
-- return NLMSG_ALIGN(sizeof(struct prefixmsg))
-- + nla_total_size(sizeof(struct in6_addr))
-- + nla_total_size(sizeof(struct prefix_cacheinfo));
--}
--
--static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
-- struct prefix_info *pinfo, u32 pid, u32 seq,
-- int event, unsigned int flags)
--{
-- struct prefixmsg *pmsg;
-- struct nlmsghdr *nlh;
-- struct prefix_cacheinfo ci;
--
-- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags);
-- if (nlh == NULL)
-- return -EMSGSIZE;
--
-- pmsg = nlmsg_data(nlh);
-- pmsg->prefix_family = AF_INET6;
-- pmsg->prefix_pad1 = 0;
-- pmsg->prefix_pad2 = 0;
-- pmsg->prefix_ifindex = idev->dev->ifindex;
-- pmsg->prefix_len = pinfo->prefix_len;
-- pmsg->prefix_type = pinfo->type;
-- pmsg->prefix_pad3 = 0;
-- pmsg->prefix_flags = 0;
-- if (pinfo->onlink)
-- pmsg->prefix_flags |= IF_PREFIX_ONLINK;
-- if (pinfo->autoconf)
-- pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
--
-- NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix);
--
-- ci.preferred_time = ntohl(pinfo->prefered);
-- ci.valid_time = ntohl(pinfo->valid);
-- NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci);
--
-- return nlmsg_end(skb, nlh);
--
--nla_put_failure:
-- nlmsg_cancel(skb, nlh);
-- return -EMSGSIZE;
--}
--
--static void inet6_prefix_notify(int event, struct inet6_dev *idev,
-- struct prefix_info *pinfo)
--{
-- struct sk_buff *skb;
-- int err = -ENOBUFS;
--
-- skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC);
-- if (skb == NULL)
-- goto errout;
--
-- err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
-- if (err < 0) {
-- /* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */
-- WARN_ON(err == -EMSGSIZE);
-- kfree_skb(skb);
-- goto errout;
-- }
++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
+ }
+
+ static inline size_t inet6_prefix_nlmsg_size(void)
+@@ -3730,10 +3764,10 @@
+ kfree_skb(skb);
+ goto errout;
+ }
- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
--errout:
-- if (err < 0)
++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+ errout:
+ if (err < 0)
- rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
--}
--
--static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
--{
-- inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
--
-- switch (event) {
-- case RTM_NEWADDR:
-- /*
-- * If the address was optimistic
-- * we inserted the route at the start of
-- * our DAD process, so we don't need
-- * to do it again
-- */
-- if (!(ifp->rt->rt6i_node))
-- ip6_ins_rt(ifp->rt);
-- if (ifp->idev->cnf.forwarding)
-- addrconf_join_anycast(ifp);
-- break;
-- case RTM_DELADDR:
-- if (ifp->idev->cnf.forwarding)
-- addrconf_leave_anycast(ifp);
-- addrconf_leave_solict(ifp->idev, &ifp->addr);
-- dst_hold(&ifp->rt->u.dst);
-- if (ip6_del_rt(ifp->rt))
-- dst_free(&ifp->rt->u.dst);
-- break;
-- }
--}
--
--static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
--{
-- rcu_read_lock_bh();
-- if (likely(ifp->idev->dead == 0))
-- __ipv6_ifa_notify(event, ifp);
-- rcu_read_unlock_bh();
--}
--
--#ifdef CONFIG_SYSCTL
--
--static
--int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
-- void __user *buffer, size_t *lenp, loff_t *ppos)
--{
-- int *valp = ctl->data;
-- int val = *valp;
-- int ret;
--
-- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
--
-- if (write && valp != &ipv6_devconf_dflt.forwarding) {
-- if (valp != &ipv6_devconf.forwarding) {
-- if ((!*valp) ^ (!val)) {
-- struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
-- if (idev == NULL)
-- return ret;
-- dev_forward_change(idev);
-- }
-- } else {
-- ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
-- addrconf_forward_change();
-- }
-- if (*valp)
-- rt6_purge_dflt_routers();
-- }
--
-- return ret;
--}
--
--static int addrconf_sysctl_forward_strategy(ctl_table *table,
-- int __user *name, int nlen,
-- void __user *oldval,
-- size_t __user *oldlenp,
-- void __user *newval, size_t newlen)
--{
-- int *valp = table->data;
-- int new;
--
-- if (!newval || !newlen)
-- return 0;
-- if (newlen != sizeof(int))
-- return -EINVAL;
-- if (get_user(new, (int __user *)newval))
-- return -EFAULT;
-- if (new == *valp)
-- return 0;
-- if (oldval && oldlenp) {
-- size_t len;
-- if (get_user(len, oldlenp))
-- return -EFAULT;
-- if (len) {
-- if (len > table->maxlen)
-- len = table->maxlen;
-- if (copy_to_user(oldval, valp, len))
-- return -EFAULT;
-- if (put_user(len, oldlenp))
-- return -EFAULT;
-- }
-- }
--
-- if (valp != &ipv6_devconf_dflt.forwarding) {
-- if (valp != &ipv6_devconf.forwarding) {
-- struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
-- int changed;
-- if (unlikely(idev == NULL))
-- return -ENODEV;
-- changed = (!*valp) ^ (!new);
-- *valp = new;
-- if (changed)
-- dev_forward_change(idev);
-- } else {
-- *valp = new;
-- addrconf_forward_change();
-- }
--
-- if (*valp)
-- rt6_purge_dflt_routers();
-- } else
-- *valp = new;
--
-- return 1;
--}
--
--static struct addrconf_sysctl_table
--{
-- struct ctl_table_header *sysctl_header;
-- ctl_table addrconf_vars[__NET_IPV6_MAX];
-- ctl_table addrconf_dev[2];
-- ctl_table addrconf_conf_dir[2];
-- ctl_table addrconf_proto_dir[2];
-- ctl_table addrconf_root_dir[2];
--} addrconf_sysctl __read_mostly = {
-- .sysctl_header = NULL,
-- .addrconf_vars = {
-- {
-- .ctl_name = NET_IPV6_FORWARDING,
-- .procname = "forwarding",
-- .data = &ipv6_devconf.forwarding,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &addrconf_sysctl_forward,
-- .strategy = &addrconf_sysctl_forward_strategy,
-- },
-- {
-- .ctl_name = NET_IPV6_HOP_LIMIT,
-- .procname = "hop_limit",
-- .data = &ipv6_devconf.hop_limit,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_MTU,
-- .procname = "mtu",
-- .data = &ipv6_devconf.mtu6,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_ACCEPT_RA,
-- .procname = "accept_ra",
-- .data = &ipv6_devconf.accept_ra,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_ACCEPT_REDIRECTS,
-- .procname = "accept_redirects",
-- .data = &ipv6_devconf.accept_redirects,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_AUTOCONF,
-- .procname = "autoconf",
-- .data = &ipv6_devconf.autoconf,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_DAD_TRANSMITS,
-- .procname = "dad_transmits",
-- .data = &ipv6_devconf.dad_transmits,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_RTR_SOLICITS,
-- .procname = "router_solicitations",
-- .data = &ipv6_devconf.rtr_solicits,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL,
-- .procname = "router_solicitation_interval",
-- .data = &ipv6_devconf.rtr_solicit_interval,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_jiffies,
-- .strategy = &sysctl_jiffies,
-- },
-- {
-- .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY,
-- .procname = "router_solicitation_delay",
-- .data = &ipv6_devconf.rtr_solicit_delay,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_jiffies,
-- .strategy = &sysctl_jiffies,
-- },
-- {
-- .ctl_name = NET_IPV6_FORCE_MLD_VERSION,
-- .procname = "force_mld_version",
-- .data = &ipv6_devconf.force_mld_version,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
--#ifdef CONFIG_IPV6_PRIVACY
-- {
-- .ctl_name = NET_IPV6_USE_TEMPADDR,
-- .procname = "use_tempaddr",
-- .data = &ipv6_devconf.use_tempaddr,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_TEMP_VALID_LFT,
-- .procname = "temp_valid_lft",
-- .data = &ipv6_devconf.temp_valid_lft,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_TEMP_PREFERED_LFT,
-- .procname = "temp_prefered_lft",
-- .data = &ipv6_devconf.temp_prefered_lft,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_REGEN_MAX_RETRY,
-- .procname = "regen_max_retry",
-- .data = &ipv6_devconf.regen_max_retry,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR,
-- .procname = "max_desync_factor",
-- .data = &ipv6_devconf.max_desync_factor,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
--#endif
-- {
-- .ctl_name = NET_IPV6_MAX_ADDRESSES,
-- .procname = "max_addresses",
-- .data = &ipv6_devconf.max_addresses,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR,
-- .procname = "accept_ra_defrtr",
-- .data = &ipv6_devconf.accept_ra_defrtr,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_ACCEPT_RA_PINFO,
-- .procname = "accept_ra_pinfo",
-- .data = &ipv6_devconf.accept_ra_pinfo,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
--#ifdef CONFIG_IPV6_ROUTER_PREF
-- {
-- .ctl_name = NET_IPV6_ACCEPT_RA_RTR_PREF,
-- .procname = "accept_ra_rtr_pref",
-- .data = &ipv6_devconf.accept_ra_rtr_pref,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL,
-- .procname = "router_probe_interval",
-- .data = &ipv6_devconf.rtr_probe_interval,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_jiffies,
-- .strategy = &sysctl_jiffies,
-- },
--#ifdef CONFIG_IPV6_ROUTE_INFO
-- {
-- .ctl_name = NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN,
-- .procname = "accept_ra_rt_info_max_plen",
-- .data = &ipv6_devconf.accept_ra_rt_info_max_plen,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
--#endif
--#endif
-- {
-- .ctl_name = NET_IPV6_PROXY_NDP,
-- .procname = "proxy_ndp",
-- .data = &ipv6_devconf.proxy_ndp,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE,
-- .procname = "accept_source_route",
-- .data = &ipv6_devconf.accept_source_route,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
--#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-- {
-- .ctl_name = CTL_UNNUMBERED,
-- .procname = "optimistic_dad",
-- .data = &ipv6_devconf.optimistic_dad,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
--
-- },
--#endif
-- {
-- .ctl_name = 0, /* sentinel */
-- }
-- },
-- .addrconf_dev = {
-- {
-- .ctl_name = NET_PROTO_CONF_ALL,
-- .procname = "all",
-- .mode = 0555,
-- .child = addrconf_sysctl.addrconf_vars,
-- },
-- {
-- .ctl_name = 0, /* sentinel */
-- }
-- },
-- .addrconf_conf_dir = {
-- {
-- .ctl_name = NET_IPV6_CONF,
-- .procname = "conf",
-- .mode = 0555,
-- .child = addrconf_sysctl.addrconf_dev,
-- },
-- {
-- .ctl_name = 0, /* sentinel */
-- }
-- },
-- .addrconf_proto_dir = {
-- {
-- .ctl_name = NET_IPV6,
-- .procname = "ipv6",
-- .mode = 0555,
-- .child = addrconf_sysctl.addrconf_conf_dir,
-- },
-- {
-- .ctl_name = 0, /* sentinel */
-- }
-- },
-- .addrconf_root_dir = {
-- {
-- .ctl_name = CTL_NET,
-- .procname = "net",
-- .mode = 0555,
-- .child = addrconf_sysctl.addrconf_proto_dir,
-- },
-- {
-- .ctl_name = 0, /* sentinel */
-- }
-- },
--};
--
--static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
--{
-- int i;
-- struct net_device *dev = idev ? idev->dev : NULL;
-- struct addrconf_sysctl_table *t;
-- char *dev_name = NULL;
--
-- t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
-- if (t == NULL)
-- return;
-- for (i=0; t->addrconf_vars[i].data; i++) {
-- t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
-- t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
-- }
-- if (dev) {
-- dev_name = dev->name;
-- t->addrconf_dev[0].ctl_name = dev->ifindex;
-- } else {
-- dev_name = "default";
-- t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
-- }
--
-- /*
-- * Make a copy of dev_name, because '.procname' is regarded as const
-- * by sysctl and we wouldn't want anyone to change it under our feet
-- * (see SIOCSIFNAME).
-- */
-- dev_name = kstrdup(dev_name, GFP_KERNEL);
-- if (!dev_name)
-- goto free;
--
-- t->addrconf_dev[0].procname = dev_name;
--
-- t->addrconf_dev[0].child = t->addrconf_vars;
-- t->addrconf_conf_dir[0].child = t->addrconf_dev;
-- t->addrconf_proto_dir[0].child = t->addrconf_conf_dir;
-- t->addrconf_root_dir[0].child = t->addrconf_proto_dir;
--
-- t->sysctl_header = register_sysctl_table(t->addrconf_root_dir);
-- if (t->sysctl_header == NULL)
-- goto free_procname;
-- else
-- p->sysctl = t;
-- return;
--
-- /* error path */
-- free_procname:
-- kfree(dev_name);
-- free:
-- kfree(t);
--
-- return;
--}
--
--static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
--{
-- if (p->sysctl) {
-- struct addrconf_sysctl_table *t = p->sysctl;
-- p->sysctl = NULL;
-- unregister_sysctl_table(t->sysctl_header);
-- kfree(t->addrconf_dev[0].procname);
-- kfree(t);
-- }
--}
--
--
--#endif
--
--/*
-- * Device notifier
-- */
--
--int register_inet6addr_notifier(struct notifier_block *nb)
--{
-- return atomic_notifier_chain_register(&inet6addr_chain, nb);
--}
--
--EXPORT_SYMBOL(register_inet6addr_notifier);
--
--int unregister_inet6addr_notifier(struct notifier_block *nb)
--{
-- return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
--}
--
--EXPORT_SYMBOL(unregister_inet6addr_notifier);
--
--/*
-- * Init / cleanup code
-- */
--
--int __init addrconf_init(void)
--{
-- int err = 0;
--
-- /* The addrconf netdev notifier requires that loopback_dev
-- * has it's ipv6 private information allocated and setup
-- * before it can bring up and give link-local addresses
-- * to other devices which are up.
-- *
-- * Unfortunately, loopback_dev is not necessarily the first
-- * entry in the global dev_base list of net devices. In fact,
-- * it is likely to be the very last entry on that list.
-- * So this causes the notifier registry below to try and
-- * give link-local addresses to all devices besides loopback_dev
-- * first, then loopback_dev, which cases all the non-loopback_dev
-- * devices to fail to get a link-local address.
-- *
-- * So, as a temporary fix, allocate the ipv6 structure for
-- * loopback_dev first by hand.
-- * Longer term, all of the dependencies ipv6 has upon the loopback
-- * device and it being up should be removed.
-- */
-- rtnl_lock();
++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err);
+ }
+
+ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
+@@ -4244,16 +4278,16 @@
+ * device and it being up should be removed.
+ */
+ rtnl_lock();
- if (!ipv6_add_dev(&loopback_dev))
-- err = -ENOMEM;
-- rtnl_unlock();
-- if (err)
-- return err;
--
++ if (!ipv6_add_dev(&init_net.loopback_dev))
+ err = -ENOMEM;
+ rtnl_unlock();
+ if (err)
+ return err;
+
- ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev);
--#ifdef CONFIG_IPV6_MULTIPLE_TABLES
++ ip6_null_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
- ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev);
- ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev);
--#endif
--
-- register_netdevice_notifier(&ipv6_dev_notf);
--
-- addrconf_verify(0);
--
-- err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
-- if (err < 0)
-- goto errout;
--
-- /* Only the first call to __rtnl_register can fail */
-- __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL);
-- __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL);
-- __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr);
-- __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
-- __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
--
--#ifdef CONFIG_SYSCTL
-- addrconf_sysctl.sysctl_header =
-- register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
-- addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
--#endif
--
-- return 0;
--errout:
-- unregister_netdevice_notifier(&ipv6_dev_notf);
--
-- return err;
--}
--
--void __exit addrconf_cleanup(void)
--{
-- struct net_device *dev;
-- struct inet6_dev *idev;
-- struct inet6_ifaddr *ifa;
-- int i;
--
-- unregister_netdevice_notifier(&ipv6_dev_notf);
--
--#ifdef CONFIG_SYSCTL
-- addrconf_sysctl_unregister(&ipv6_devconf_dflt);
-- addrconf_sysctl_unregister(&ipv6_devconf);
--#endif
--
-- rtnl_lock();
--
-- /*
-- * clean dev list.
-- */
--
++ ip6_prohibit_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
++ ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
+ #endif
+
+ register_netdevice_notifier(&ipv6_dev_notf);
+@@ -4304,12 +4338,12 @@
+ * clean dev list.
+ */
+
- for_each_netdev(dev) {
-- if ((idev = __in6_dev_get(dev)) == NULL)
-- continue;
-- addrconf_ifdown(dev, 1);
-- }
++ for_each_netdev(&init_net, dev) {
+ if ((idev = __in6_dev_get(dev)) == NULL)
+ continue;
+ addrconf_ifdown(dev, 1);
+ }
- addrconf_ifdown(&loopback_dev, 2);
--
-- /*
-- * Check hash table.
-- */
--
-- write_lock_bh(&addrconf_hash_lock);
-- for (i=0; i < IN6_ADDR_HSIZE; i++) {
-- for (ifa=inet6_addr_lst[i]; ifa; ) {
-- struct inet6_ifaddr *bifa;
--
-- bifa = ifa;
-- ifa = ifa->lst_next;
-- printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
-- /* Do not free it; something is wrong.
-- Now we can investigate it with debugger.
-- */
-- }
-- }
-- write_unlock_bh(&addrconf_hash_lock);
--
-- del_timer(&addr_chk_timer);
--
-- rtnl_unlock();
--
--#ifdef CONFIG_PROC_FS
++ addrconf_ifdown(&init_net.loopback_dev, 2);
+
+ /*
+ * Check hash table.
+@@ -4335,6 +4369,6 @@
+ rtnl_unlock();
+
+ #ifdef CONFIG_PROC_FS
- proc_net_remove("if_inet6");
--#endif
--}
++ proc_net_remove(&init_net, "if_inet6");
+ #endif
+ }
diff -Nurb linux-2.6.22-570/net/ipv6/af_inet6.c linux-2.6.22-590/net/ipv6/af_inet6.c
--- linux-2.6.22-570/net/ipv6/af_inet6.c 2008-03-20 13:25:46.000000000 -0400
+++ linux-2.6.22-590/net/ipv6/af_inet6.c 2008-03-20 13:28:03.000000000 -0400
}
*/
connection_sk = (*pskb)->sk;
-diff -Nurb linux-2.6.22-570/net/netfilter/xt_MARK.c.orig linux-2.6.22-590/net/netfilter/xt_MARK.c.orig
---- linux-2.6.22-570/net/netfilter/xt_MARK.c.orig 2008-03-20 13:25:49.000000000 -0400
-+++ linux-2.6.22-590/net/netfilter/xt_MARK.c.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,284 +0,0 @@
--/* This is a module which is used for setting the NFMARK field of an skb. */
--
--/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
-- *
-- * This program is free software; you can redistribute it and/or modify
-- * it under the terms of the GNU General Public License version 2 as
-- * published by the Free Software Foundation.
-- *
-- */
--
--#include <linux/module.h>
--#include <linux/version.h>
--#include <linux/skbuff.h>
--#include <linux/ip.h>
--#include <net/checksum.h>
--#include <net/route.h>
--#include <net/inet_hashtables.h>
--
--#include <net/netfilter/nf_conntrack.h>
--#include <linux/netfilter/x_tables.h>
--#include <linux/netfilter/xt_MARK.h>
--
--MODULE_LICENSE("GPL");
--MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
--MODULE_DESCRIPTION("ip[6]tables MARK modification module");
--MODULE_ALIAS("ipt_MARK");
--MODULE_ALIAS("ip6t_MARK");
--
--static inline u_int16_t
--get_dst_port(struct nf_conntrack_tuple *tuple)
--{
-- switch (tuple->dst.protonum) {
-- case IPPROTO_GRE:
-- /* XXX Truncate 32-bit GRE key to 16 bits */
--#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
-- return tuple->dst.u.gre.key;
--#else
-- return htons(ntohl(tuple->dst.u.gre.key));
--#endif
-- case IPPROTO_ICMP:
-- /* Bind on ICMP echo ID */
-- return tuple->src.u.icmp.id;
-- case IPPROTO_TCP:
-- return tuple->dst.u.tcp.port;
-- case IPPROTO_UDP:
-- return tuple->dst.u.udp.port;
-- default:
-- return tuple->dst.u.all;
-- }
--}
--
--static inline u_int16_t
--get_src_port(struct nf_conntrack_tuple *tuple)
--{
-- switch (tuple->dst.protonum) {
-- case IPPROTO_GRE:
-- /* XXX Truncate 32-bit GRE key to 16 bits */
-- return htons(ntohl(tuple->src.u.gre.key));
-- case IPPROTO_ICMP:
-- /* Bind on ICMP echo ID */
-- return tuple->src.u.icmp.id;
-- case IPPROTO_TCP:
-- return tuple->src.u.tcp.port;
-- case IPPROTO_UDP:
-- return tuple->src.u.udp.port;
-- default:
-- return tuple->src.u.all;
-- }
--}
--
--static unsigned int
--target_v0(struct sk_buff **pskb,
-- const struct net_device *in,
-- const struct net_device *out,
-- unsigned int hooknum,
-- const struct xt_target *target,
-- const void *targinfo)
--{
-- const struct xt_mark_target_info *markinfo = targinfo;
--
-- (*pskb)->mark = markinfo->mark;
-- return XT_CONTINUE;
--}
--
--static unsigned int
--target_v1(struct sk_buff **pskb,
-- const struct net_device *in,
-- const struct net_device *out,
-- unsigned int hooknum,
-- const struct xt_target *target,
-- const void *targinfo)
--{
-- const struct xt_mark_target_info_v1 *markinfo = targinfo;
-- int mark = -1;
--
-- switch (markinfo->mode) {
-- case XT_MARK_SET:
-- mark = markinfo->mark;
-- break;
--
-- case XT_MARK_AND:
-- mark = (*pskb)->mark & markinfo->mark;
-- break;
--
-- case XT_MARK_OR:
-- mark = (*pskb)->mark | markinfo->mark;
-- break;
--
-- case XT_MARK_COPYXID: {
-- enum ip_conntrack_info ctinfo;
-- struct sock *connection_sk=NULL;
-- int dif;
--
-- struct nf_conn *ct = nf_ct_get((*pskb), &ctinfo);
-- extern struct inet_hashinfo tcp_hashinfo;
-- enum ip_conntrack_dir dir;
-- if (!ct)
-- break;
--
-- dir = CTINFO2DIR(ctinfo);
-- u_int32_t src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
-- u_int16_t src_port = get_src_port(&ct->tuplehash[dir].tuple);
-- u_int16_t proto = ct->tuplehash[dir].tuple.dst.protonum;
--
-- u_int32_t ip;
-- u_int16_t port;
--
-- dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
-- ip = ct->tuplehash[dir].tuple.dst.u3.ip;
-- port = get_dst_port(&ct->tuplehash[dir].tuple);
--
-- if (proto == 1) {
-- if (((*pskb)->mark!=-1) && (*pskb)->mark)
-- ct->xid[0]=(*pskb)->mark;
-- if (ct->xid[0])
-- mark = ct->xid[0];
-- printk(KERN_CRIT "%d %d\n",ct->xid[0],(*pskb)->mark);
--
-- }
-- else if (proto == 6) {
-- if ((*pskb)->sk)
-- connection_sk = (*pskb)->sk;
-- else {
-- connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
-- }
--
-- if (connection_sk) {
-- connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[dir];
-- ct->xid[!dir]=connection_sk->sk_xid;
-- if (connection_sk->sk_xid != 0)
-- mark = connection_sk->sk_xid;
-- if (connection_sk != (*pskb)->sk)
-- sock_put(connection_sk);
-- }
-- break;
-- }
-- }
-- }
--
-- if (mark != -1)
-- (*pskb)->mark = mark;
-- return XT_CONTINUE;
--}
--
--
--static int
--checkentry_v0(const char *tablename,
-- const void *entry,
-- const struct xt_target *target,
-- void *targinfo,
-- unsigned int hook_mask)
--{
-- struct xt_mark_target_info *markinfo = targinfo;
--
-- if (markinfo->mark > 0xffffffff) {
-- printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
-- return 0;
-- }
-- return 1;
--}
--
--static int
--checkentry_v1(const char *tablename,
-- const void *entry,
-- const struct xt_target *target,
-- void *targinfo,
-- unsigned int hook_mask)
--{
-- struct xt_mark_target_info_v1 *markinfo = targinfo;
--
-- if (markinfo->mode != XT_MARK_SET
-- && markinfo->mode != XT_MARK_AND
-- && markinfo->mode != XT_MARK_OR
-- && markinfo->mode != XT_MARK_COPYXID) {
-- printk(KERN_WARNING "MARK: unknown mode %u\n",
-- markinfo->mode);
-- return 0;
-- }
-- if (markinfo->mark > 0xffffffff) {
-- printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
-- return 0;
-- }
-- return 1;
--}
--
--#ifdef CONFIG_COMPAT
--struct compat_xt_mark_target_info_v1 {
-- compat_ulong_t mark;
-- u_int8_t mode;
-- u_int8_t __pad1;
-- u_int16_t __pad2;
--};
--
--static void compat_from_user_v1(void *dst, void *src)
--{
-- struct compat_xt_mark_target_info_v1 *cm = src;
-- struct xt_mark_target_info_v1 m = {
-- .mark = cm->mark,
-- .mode = cm->mode,
-- };
-- memcpy(dst, &m, sizeof(m));
--}
--
--static int compat_to_user_v1(void __user *dst, void *src)
--{
-- struct xt_mark_target_info_v1 *m = src;
-- struct compat_xt_mark_target_info_v1 cm = {
-- .mark = m->mark,
-- .mode = m->mode,
-- };
-- return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
--}
--#endif /* CONFIG_COMPAT */
--
--static struct xt_target xt_mark_target[] = {
-- {
-- .name = "MARK",
-- .family = AF_INET,
-- .revision = 0,
-- .checkentry = checkentry_v0,
-- .target = target_v0,
-- .targetsize = sizeof(struct xt_mark_target_info),
-- .table = "mangle",
-- .me = THIS_MODULE,
-- },
-- {
-- .name = "MARK",
-- .family = AF_INET,
-- .revision = 1,
-- .checkentry = checkentry_v1,
-- .target = target_v1,
-- .targetsize = sizeof(struct xt_mark_target_info_v1),
--#ifdef CONFIG_COMPAT
-- .compatsize = sizeof(struct compat_xt_mark_target_info_v1),
-- .compat_from_user = compat_from_user_v1,
-- .compat_to_user = compat_to_user_v1,
--#endif
-- .table = "mangle",
-- .me = THIS_MODULE,
-- },
-- {
-- .name = "MARK",
-- .family = AF_INET6,
-- .revision = 0,
-- .checkentry = checkentry_v0,
-- .target = target_v0,
-- .targetsize = sizeof(struct xt_mark_target_info),
-- .table = "mangle",
-- .me = THIS_MODULE,
-- },
--};
--
--static int __init xt_mark_init(void)
--{
-- return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
--}
--
--static void __exit xt_mark_fini(void)
--{
-- xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
--}
--
--module_init(xt_mark_init);
--module_exit(xt_mark_fini);
diff -Nurb linux-2.6.22-570/net/netfilter/xt_hashlimit.c linux-2.6.22-590/net/netfilter/xt_hashlimit.c
--- linux-2.6.22-570/net/netfilter/xt_hashlimit.c 2007-07-08 19:32:17.000000000 -0400
+++ linux-2.6.22-590/net/netfilter/xt_hashlimit.c 2008-03-20 13:28:08.000000000 -0400
return 0;
}
-diff -Nurb linux-2.6.22-570/net/socket.c.orig linux-2.6.22-590/net/socket.c.orig
---- linux-2.6.22-570/net/socket.c.orig 2008-03-20 13:25:40.000000000 -0400
-+++ linux-2.6.22-590/net/socket.c.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,2344 +0,0 @@
--/*
-- * NET An implementation of the SOCKET network access protocol.
-- *
-- * Version: @(#)socket.c 1.1.93 18/02/95
-- *
-- * Authors: Orest Zborowski, <obz@Kodak.COM>
-- * Ross Biro
-- * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
-- *
-- * Fixes:
-- * Anonymous : NOTSOCK/BADF cleanup. Error fix in
-- * shutdown()
-- * Alan Cox : verify_area() fixes
-- * Alan Cox : Removed DDI
-- * Jonathan Kamens : SOCK_DGRAM reconnect bug
-- * Alan Cox : Moved a load of checks to the very
-- * top level.
-- * Alan Cox : Move address structures to/from user
-- * mode above the protocol layers.
-- * Rob Janssen : Allow 0 length sends.
-- * Alan Cox : Asynchronous I/O support (cribbed from the
-- * tty drivers).
-- * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
-- * Jeff Uphoff : Made max number of sockets command-line
-- * configurable.
-- * Matti Aarnio : Made the number of sockets dynamic,
-- * to be allocated when needed, and mr.
-- * Uphoff's max is used as max to be
-- * allowed to allocate.
-- * Linus : Argh. removed all the socket allocation
-- * altogether: it's in the inode now.
-- * Alan Cox : Made sock_alloc()/sock_release() public
-- * for NetROM and future kernel nfsd type
-- * stuff.
-- * Alan Cox : sendmsg/recvmsg basics.
-- * Tom Dyas : Export net symbols.
-- * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
-- * Alan Cox : Added thread locking to sys_* calls
-- * for sockets. May have errors at the
-- * moment.
-- * Kevin Buhr : Fixed the dumb errors in the above.
-- * Andi Kleen : Some small cleanups, optimizations,
-- * and fixed a copy_from_user() bug.
-- * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
-- * Tigran Aivazian : Made listen(2) backlog sanity checks
-- * protocol-independent
-- *
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License
-- * as published by the Free Software Foundation; either version
-- * 2 of the License, or (at your option) any later version.
-- *
-- *
-- * This module is effectively the top level interface to the BSD socket
-- * paradigm.
-- *
-- * Based upon Swansea University Computer Society NET3.039
-- */
--
--#include <linux/mm.h>
--#include <linux/socket.h>
--#include <linux/file.h>
--#include <linux/net.h>
--#include <linux/interrupt.h>
--#include <linux/rcupdate.h>
--#include <linux/netdevice.h>
--#include <linux/proc_fs.h>
--#include <linux/seq_file.h>
--#include <linux/mutex.h>
--#include <linux/wanrouter.h>
--#include <linux/if_bridge.h>
--#include <linux/if_frad.h>
--#include <linux/if_vlan.h>
--#include <linux/init.h>
--#include <linux/poll.h>
--#include <linux/cache.h>
--#include <linux/module.h>
--#include <linux/highmem.h>
--#include <linux/mount.h>
--#include <linux/security.h>
--#include <linux/syscalls.h>
--#include <linux/compat.h>
--#include <linux/kmod.h>
--#include <linux/audit.h>
--#include <linux/wireless.h>
--
--#include <asm/uaccess.h>
--#include <asm/unistd.h>
--
--#include <net/compat.h>
--
--#include <net/sock.h>
--#include <linux/netfilter.h>
--
--static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
--static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
-- unsigned long nr_segs, loff_t pos);
--static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
-- unsigned long nr_segs, loff_t pos);
--static int sock_mmap(struct file *file, struct vm_area_struct *vma);
--
--static int sock_close(struct inode *inode, struct file *file);
--static unsigned int sock_poll(struct file *file,
-- struct poll_table_struct *wait);
--static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
--#ifdef CONFIG_COMPAT
--static long compat_sock_ioctl(struct file *file,
-- unsigned int cmd, unsigned long arg);
--#endif
--static int sock_fasync(int fd, struct file *filp, int on);
--static ssize_t sock_sendpage(struct file *file, struct page *page,
-- int offset, size_t size, loff_t *ppos, int more);
--
--/*
-- * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
-- * in the operation structures but are done directly via the socketcall() multiplexor.
-- */
--
--static const struct file_operations socket_file_ops = {
-- .owner = THIS_MODULE,
-- .llseek = no_llseek,
-- .aio_read = sock_aio_read,
-- .aio_write = sock_aio_write,
-- .poll = sock_poll,
-- .unlocked_ioctl = sock_ioctl,
--#ifdef CONFIG_COMPAT
-- .compat_ioctl = compat_sock_ioctl,
--#endif
-- .mmap = sock_mmap,
-- .open = sock_no_open, /* special open code to disallow open via /proc */
-- .release = sock_close,
-- .fasync = sock_fasync,
-- .sendpage = sock_sendpage,
-- .splice_write = generic_splice_sendpage,
--};
--
--/*
-- * The protocol list. Each protocol is registered in here.
-- */
--
--static DEFINE_SPINLOCK(net_family_lock);
--static const struct net_proto_family *net_families[NPROTO] __read_mostly;
--
--/*
-- * Statistics counters of the socket lists
-- */
--
--static DEFINE_PER_CPU(int, sockets_in_use) = 0;
--
--/*
-- * Support routines.
-- * Move socket addresses back and forth across the kernel/user
-- * divide and look after the messy bits.
-- */
--
--#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
-- 16 for IP, 16 for IPX,
-- 24 for IPv6,
-- about 80 for AX.25
-- must be at least one bigger than
-- the AF_UNIX size (see net/unix/af_unix.c
-- :unix_mkname()).
-- */
--
--/**
-- * move_addr_to_kernel - copy a socket address into kernel space
-- * @uaddr: Address in user space
-- * @kaddr: Address in kernel space
-- * @ulen: Length in user space
-- *
-- * The address is copied into kernel space. If the provided address is
-- * too long an error code of -EINVAL is returned. If the copy gives
-- * invalid addresses -EFAULT is returned. On a success 0 is returned.
-- */
--
--int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
--{
-- if (ulen < 0 || ulen > MAX_SOCK_ADDR)
-- return -EINVAL;
-- if (ulen == 0)
-- return 0;
-- if (copy_from_user(kaddr, uaddr, ulen))
-- return -EFAULT;
-- return audit_sockaddr(ulen, kaddr);
--}
--
--/**
-- * move_addr_to_user - copy an address to user space
-- * @kaddr: kernel space address
-- * @klen: length of address in kernel
-- * @uaddr: user space address
-- * @ulen: pointer to user length field
-- *
-- * The value pointed to by ulen on entry is the buffer length available.
-- * This is overwritten with the buffer space used. -EINVAL is returned
-- * if an overlong buffer is specified or a negative buffer size. -EFAULT
-- * is returned if either the buffer or the length field are not
-- * accessible.
-- * After copying the data up to the limit the user specifies, the true
-- * length of the data is written over the length limit the user
-- * specified. Zero is returned for a success.
-- */
--
--int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
-- int __user *ulen)
--{
-- int err;
-- int len;
--
-- err = get_user(len, ulen);
-- if (err)
-- return err;
-- if (len > klen)
-- len = klen;
-- if (len < 0 || len > MAX_SOCK_ADDR)
-- return -EINVAL;
-- if (len) {
-- if (audit_sockaddr(klen, kaddr))
-- return -ENOMEM;
-- if (copy_to_user(uaddr, kaddr, len))
-- return -EFAULT;
-- }
-- /*
-- * "fromlen shall refer to the value before truncation.."
-- * 1003.1g
-- */
-- return __put_user(klen, ulen);
--}
--
--#define SOCKFS_MAGIC 0x534F434B
--
--static struct kmem_cache *sock_inode_cachep __read_mostly;
--
--static struct inode *sock_alloc_inode(struct super_block *sb)
--{
-- struct socket_alloc *ei;
--
-- ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
-- if (!ei)
-- return NULL;
-- init_waitqueue_head(&ei->socket.wait);
--
-- ei->socket.fasync_list = NULL;
-- ei->socket.state = SS_UNCONNECTED;
-- ei->socket.flags = 0;
-- ei->socket.ops = NULL;
-- ei->socket.sk = NULL;
-- ei->socket.file = NULL;
--
-- return &ei->vfs_inode;
--}
--
--static void sock_destroy_inode(struct inode *inode)
--{
-- kmem_cache_free(sock_inode_cachep,
-- container_of(inode, struct socket_alloc, vfs_inode));
--}
--
--static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
--{
-- struct socket_alloc *ei = (struct socket_alloc *)foo;
--
-- inode_init_once(&ei->vfs_inode);
--}
--
--static int init_inodecache(void)
--{
-- sock_inode_cachep = kmem_cache_create("sock_inode_cache",
-- sizeof(struct socket_alloc),
-- 0,
-- (SLAB_HWCACHE_ALIGN |
-- SLAB_RECLAIM_ACCOUNT |
-- SLAB_MEM_SPREAD),
-- init_once,
-- NULL);
-- if (sock_inode_cachep == NULL)
-- return -ENOMEM;
-- return 0;
--}
--
--static struct super_operations sockfs_ops = {
-- .alloc_inode = sock_alloc_inode,
-- .destroy_inode =sock_destroy_inode,
-- .statfs = simple_statfs,
--};
--
--static int sockfs_get_sb(struct file_system_type *fs_type,
-- int flags, const char *dev_name, void *data,
-- struct vfsmount *mnt)
--{
-- return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
-- mnt);
--}
--
--static struct vfsmount *sock_mnt __read_mostly;
--
--static struct file_system_type sock_fs_type = {
-- .name = "sockfs",
-- .get_sb = sockfs_get_sb,
-- .kill_sb = kill_anon_super,
--};
--
--static int sockfs_delete_dentry(struct dentry *dentry)
--{
-- /*
-- * At creation time, we pretended this dentry was hashed
-- * (by clearing DCACHE_UNHASHED bit in d_flags)
-- * At delete time, we restore the truth : not hashed.
-- * (so that dput() can proceed correctly)
-- */
-- dentry->d_flags |= DCACHE_UNHASHED;
-- return 0;
--}
--
--/*
-- * sockfs_dname() is called from d_path().
-- */
--static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
--{
-- return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
-- dentry->d_inode->i_ino);
--}
--
--static struct dentry_operations sockfs_dentry_operations = {
-- .d_delete = sockfs_delete_dentry,
-- .d_dname = sockfs_dname,
--};
--
--/*
-- * Obtains the first available file descriptor and sets it up for use.
-- *
-- * These functions create file structures and maps them to fd space
-- * of the current process. On success it returns file descriptor
-- * and file struct implicitly stored in sock->file.
-- * Note that another thread may close file descriptor before we return
-- * from this function. We use the fact that now we do not refer
-- * to socket after mapping. If one day we will need it, this
-- * function will increment ref. count on file by 1.
-- *
-- * In any case returned fd MAY BE not valid!
-- * This race condition is unavoidable
-- * with shared fd spaces, we cannot solve it inside kernel,
-- * but we take care of internal coherence yet.
-- */
--
--static int sock_alloc_fd(struct file **filep)
--{
-- int fd;
--
-- fd = get_unused_fd();
-- if (likely(fd >= 0)) {
-- struct file *file = get_empty_filp();
--
-- *filep = file;
-- if (unlikely(!file)) {
-- put_unused_fd(fd);
-- return -ENFILE;
-- }
-- } else
-- *filep = NULL;
-- return fd;
--}
--
--static int sock_attach_fd(struct socket *sock, struct file *file)
--{
-- struct qstr name = { .name = "" };
--
-- file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
-- if (unlikely(!file->f_path.dentry))
-- return -ENOMEM;
--
-- file->f_path.dentry->d_op = &sockfs_dentry_operations;
-- /*
-- * We dont want to push this dentry into global dentry hash table.
-- * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
-- * This permits a working /proc/$pid/fd/XXX on sockets
-- */
-- file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
-- d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
-- file->f_path.mnt = mntget(sock_mnt);
-- file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
--
-- sock->file = file;
-- file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
-- file->f_mode = FMODE_READ | FMODE_WRITE;
-- file->f_flags = O_RDWR;
-- file->f_pos = 0;
-- file->private_data = sock;
--
-- return 0;
--}
--
--int sock_map_fd(struct socket *sock)
--{
-- struct file *newfile;
-- int fd = sock_alloc_fd(&newfile);
--
-- if (likely(fd >= 0)) {
-- int err = sock_attach_fd(sock, newfile);
--
-- if (unlikely(err < 0)) {
-- put_filp(newfile);
-- put_unused_fd(fd);
-- return err;
-- }
-- fd_install(fd, newfile);
-- }
-- return fd;
--}
--
--static struct socket *sock_from_file(struct file *file, int *err)
--{
-- if (file->f_op == &socket_file_ops)
-- return file->private_data; /* set in sock_map_fd */
--
-- *err = -ENOTSOCK;
-- return NULL;
--}
--
--/**
-- * sockfd_lookup - Go from a file number to its socket slot
-- * @fd: file handle
-- * @err: pointer to an error code return
-- *
-- * The file handle passed in is locked and the socket it is bound
-- * too is returned. If an error occurs the err pointer is overwritten
-- * with a negative errno code and NULL is returned. The function checks
-- * for both invalid handles and passing a handle which is not a socket.
-- *
-- * On a success the socket object pointer is returned.
-- */
--
--struct socket *sockfd_lookup(int fd, int *err)
--{
-- struct file *file;
-- struct socket *sock;
--
-- file = fget(fd);
-- if (!file) {
-- *err = -EBADF;
-- return NULL;
-- }
--
-- sock = sock_from_file(file, err);
-- if (!sock)
-- fput(file);
-- return sock;
--}
--
--static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
--{
-- struct file *file;
-- struct socket *sock;
--
-- *err = -EBADF;
-- file = fget_light(fd, fput_needed);
-- if (file) {
-- sock = sock_from_file(file, err);
-- if (sock)
-- return sock;
-- fput_light(file, *fput_needed);
-- }
-- return NULL;
--}
--
--/**
-- * sock_alloc - allocate a socket
-- *
-- * Allocate a new inode and socket object. The two are bound together
-- * and initialised. The socket is then returned. If we are out of inodes
-- * NULL is returned.
-- */
--
--static struct socket *sock_alloc(void)
--{
-- struct inode *inode;
-- struct socket *sock;
--
-- inode = new_inode(sock_mnt->mnt_sb);
-- if (!inode)
-- return NULL;
--
-- sock = SOCKET_I(inode);
--
-- inode->i_mode = S_IFSOCK | S_IRWXUGO;
-- inode->i_uid = current->fsuid;
-- inode->i_gid = current->fsgid;
--
-- get_cpu_var(sockets_in_use)++;
-- put_cpu_var(sockets_in_use);
-- return sock;
--}
--
--/*
-- * In theory you can't get an open on this inode, but /proc provides
-- * a back door. Remember to keep it shut otherwise you'll let the
-- * creepy crawlies in.
-- */
--
--static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
--{
-- return -ENXIO;
--}
--
--const struct file_operations bad_sock_fops = {
-- .owner = THIS_MODULE,
-- .open = sock_no_open,
--};
--
--/**
-- * sock_release - close a socket
-- * @sock: socket to close
-- *
-- * The socket is released from the protocol stack if it has a release
-- * callback, and the inode is then released if the socket is bound to
-- * an inode not a file.
-- */
--
--void sock_release(struct socket *sock)
--{
-- if (sock->ops) {
-- struct module *owner = sock->ops->owner;
--
-- sock->ops->release(sock);
-- sock->ops = NULL;
-- module_put(owner);
-- }
--
-- if (sock->fasync_list)
-- printk(KERN_ERR "sock_release: fasync list not empty!\n");
--
-- get_cpu_var(sockets_in_use)--;
-- put_cpu_var(sockets_in_use);
-- if (!sock->file) {
-- iput(SOCK_INODE(sock));
-- return;
-- }
-- sock->file = NULL;
--}
--
--static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-- struct msghdr *msg, size_t size)
--{
-- struct sock_iocb *si = kiocb_to_siocb(iocb);
-- int err;
--
-- si->sock = sock;
-- si->scm = NULL;
-- si->msg = msg;
-- si->size = size;
--
-- err = security_socket_sendmsg(sock, msg, size);
-- if (err)
-- return err;
--
-- return sock->ops->sendmsg(iocb, sock, msg, size);
--}
--
--int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
--{
-- struct kiocb iocb;
-- struct sock_iocb siocb;
-- int ret;
--
-- init_sync_kiocb(&iocb, NULL);
-- iocb.private = &siocb;
-- ret = __sock_sendmsg(&iocb, sock, msg, size);
-- if (-EIOCBQUEUED == ret)
-- ret = wait_on_sync_kiocb(&iocb);
-- return ret;
--}
--
--int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
-- struct kvec *vec, size_t num, size_t size)
--{
-- mm_segment_t oldfs = get_fs();
-- int result;
--
-- set_fs(KERNEL_DS);
-- /*
-- * the following is safe, since for compiler definitions of kvec and
-- * iovec are identical, yielding the same in-core layout and alignment
-- */
-- msg->msg_iov = (struct iovec *)vec;
-- msg->msg_iovlen = num;
-- result = sock_sendmsg(sock, msg, size);
-- set_fs(oldfs);
-- return result;
--}
--
--/*
-- * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
-- */
--void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
-- struct sk_buff *skb)
--{
-- ktime_t kt = skb->tstamp;
--
-- if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
-- struct timeval tv;
-- /* Race occurred between timestamp enabling and packet
-- receiving. Fill in the current time for now. */
-- if (kt.tv64 == 0)
-- kt = ktime_get_real();
-- skb->tstamp = kt;
-- tv = ktime_to_timeval(kt);
-- put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
-- } else {
-- struct timespec ts;
-- /* Race occurred between timestamp enabling and packet
-- receiving. Fill in the current time for now. */
-- if (kt.tv64 == 0)
-- kt = ktime_get_real();
-- skb->tstamp = kt;
-- ts = ktime_to_timespec(kt);
-- put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
-- }
--}
--
--EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
--
--static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-- struct msghdr *msg, size_t size, int flags)
--{
-- int err;
-- struct sock_iocb *si = kiocb_to_siocb(iocb);
--
-- si->sock = sock;
-- si->scm = NULL;
-- si->msg = msg;
-- si->size = size;
-- si->flags = flags;
--
-- err = security_socket_recvmsg(sock, msg, size, flags);
-- if (err)
-- return err;
--
-- return sock->ops->recvmsg(iocb, sock, msg, size, flags);
--}
--
--int sock_recvmsg(struct socket *sock, struct msghdr *msg,
-- size_t size, int flags)
--{
-- struct kiocb iocb;
-- struct sock_iocb siocb;
-- int ret;
--
-- init_sync_kiocb(&iocb, NULL);
-- iocb.private = &siocb;
-- ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
-- if (-EIOCBQUEUED == ret)
-- ret = wait_on_sync_kiocb(&iocb);
-- return ret;
--}
--
--int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
-- struct kvec *vec, size_t num, size_t size, int flags)
--{
-- mm_segment_t oldfs = get_fs();
-- int result;
--
-- set_fs(KERNEL_DS);
-- /*
-- * the following is safe, since for compiler definitions of kvec and
-- * iovec are identical, yielding the same in-core layout and alignment
-- */
-- msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
-- result = sock_recvmsg(sock, msg, size, flags);
-- set_fs(oldfs);
-- return result;
--}
--
--static void sock_aio_dtor(struct kiocb *iocb)
--{
-- kfree(iocb->private);
--}
--
--static ssize_t sock_sendpage(struct file *file, struct page *page,
-- int offset, size_t size, loff_t *ppos, int more)
--{
-- struct socket *sock;
-- int flags;
--
-- sock = file->private_data;
--
-- flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
-- if (more)
-- flags |= MSG_MORE;
--
-- return sock->ops->sendpage(sock, page, offset, size, flags);
--}
--
--static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
-- struct sock_iocb *siocb)
--{
-- if (!is_sync_kiocb(iocb)) {
-- siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
-- if (!siocb)
-- return NULL;
-- iocb->ki_dtor = sock_aio_dtor;
-- }
--
-- siocb->kiocb = iocb;
-- iocb->private = siocb;
-- return siocb;
--}
--
--static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
-- struct file *file, const struct iovec *iov,
-- unsigned long nr_segs)
--{
-- struct socket *sock = file->private_data;
-- size_t size = 0;
-- int i;
--
-- for (i = 0; i < nr_segs; i++)
-- size += iov[i].iov_len;
--
-- msg->msg_name = NULL;
-- msg->msg_namelen = 0;
-- msg->msg_control = NULL;
-- msg->msg_controllen = 0;
-- msg->msg_iov = (struct iovec *)iov;
-- msg->msg_iovlen = nr_segs;
-- msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
--
-- return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
--}
--
--static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
-- unsigned long nr_segs, loff_t pos)
--{
-- struct sock_iocb siocb, *x;
--
-- if (pos != 0)
-- return -ESPIPE;
--
-- if (iocb->ki_left == 0) /* Match SYS5 behaviour */
-- return 0;
--
--
-- x = alloc_sock_iocb(iocb, &siocb);
-- if (!x)
-- return -ENOMEM;
-- return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
--}
--
--static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
-- struct file *file, const struct iovec *iov,
-- unsigned long nr_segs)
--{
-- struct socket *sock = file->private_data;
-- size_t size = 0;
-- int i;
--
-- for (i = 0; i < nr_segs; i++)
-- size += iov[i].iov_len;
--
-- msg->msg_name = NULL;
-- msg->msg_namelen = 0;
-- msg->msg_control = NULL;
-- msg->msg_controllen = 0;
-- msg->msg_iov = (struct iovec *)iov;
-- msg->msg_iovlen = nr_segs;
-- msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
-- if (sock->type == SOCK_SEQPACKET)
-- msg->msg_flags |= MSG_EOR;
--
-- return __sock_sendmsg(iocb, sock, msg, size);
--}
--
--static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
-- unsigned long nr_segs, loff_t pos)
--{
-- struct sock_iocb siocb, *x;
--
-- if (pos != 0)
-- return -ESPIPE;
--
-- x = alloc_sock_iocb(iocb, &siocb);
-- if (!x)
-- return -ENOMEM;
--
-- return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
--}
--
--/*
-- * Atomic setting of ioctl hooks to avoid race
-- * with module unload.
-- */
--
--static DEFINE_MUTEX(br_ioctl_mutex);
--static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
--
--void brioctl_set(int (*hook) (unsigned int, void __user *))
--{
-- mutex_lock(&br_ioctl_mutex);
-- br_ioctl_hook = hook;
-- mutex_unlock(&br_ioctl_mutex);
--}
--
--EXPORT_SYMBOL(brioctl_set);
--
--static DEFINE_MUTEX(vlan_ioctl_mutex);
--static int (*vlan_ioctl_hook) (void __user *arg);
--
--void vlan_ioctl_set(int (*hook) (void __user *))
--{
-- mutex_lock(&vlan_ioctl_mutex);
-- vlan_ioctl_hook = hook;
-- mutex_unlock(&vlan_ioctl_mutex);
--}
--
--EXPORT_SYMBOL(vlan_ioctl_set);
--
--static DEFINE_MUTEX(dlci_ioctl_mutex);
--static int (*dlci_ioctl_hook) (unsigned int, void __user *);
--
--void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
--{
-- mutex_lock(&dlci_ioctl_mutex);
-- dlci_ioctl_hook = hook;
-- mutex_unlock(&dlci_ioctl_mutex);
--}
--
--EXPORT_SYMBOL(dlci_ioctl_set);
--
--/*
-- * With an ioctl, arg may well be a user mode pointer, but we don't know
-- * what to do with it - that's up to the protocol still.
-- */
--
--static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
--{
-- struct socket *sock;
-- void __user *argp = (void __user *)arg;
-- int pid, err;
--
-- sock = file->private_data;
-- if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
-- err = dev_ioctl(cmd, argp);
-- } else
--#ifdef CONFIG_WIRELESS_EXT
-- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
-- err = dev_ioctl(cmd, argp);
-- } else
--#endif /* CONFIG_WIRELESS_EXT */
-- switch (cmd) {
-- case FIOSETOWN:
-- case SIOCSPGRP:
-- err = -EFAULT;
-- if (get_user(pid, (int __user *)argp))
-- break;
-- err = f_setown(sock->file, pid, 1);
-- break;
-- case FIOGETOWN:
-- case SIOCGPGRP:
-- err = put_user(f_getown(sock->file),
-- (int __user *)argp);
-- break;
-- case SIOCGIFBR:
-- case SIOCSIFBR:
-- case SIOCBRADDBR:
-- case SIOCBRDELBR:
-- err = -ENOPKG;
-- if (!br_ioctl_hook)
-- request_module("bridge");
--
-- mutex_lock(&br_ioctl_mutex);
-- if (br_ioctl_hook)
-- err = br_ioctl_hook(cmd, argp);
-- mutex_unlock(&br_ioctl_mutex);
-- break;
-- case SIOCGIFVLAN:
-- case SIOCSIFVLAN:
-- err = -ENOPKG;
-- if (!vlan_ioctl_hook)
-- request_module("8021q");
--
-- mutex_lock(&vlan_ioctl_mutex);
-- if (vlan_ioctl_hook)
-- err = vlan_ioctl_hook(argp);
-- mutex_unlock(&vlan_ioctl_mutex);
-- break;
-- case SIOCADDDLCI:
-- case SIOCDELDLCI:
-- err = -ENOPKG;
-- if (!dlci_ioctl_hook)
-- request_module("dlci");
--
-- if (dlci_ioctl_hook) {
-- mutex_lock(&dlci_ioctl_mutex);
-- err = dlci_ioctl_hook(cmd, argp);
-- mutex_unlock(&dlci_ioctl_mutex);
-- }
-- break;
-- default:
-- err = sock->ops->ioctl(sock, cmd, arg);
--
-- /*
-- * If this ioctl is unknown try to hand it down
-- * to the NIC driver.
-- */
-- if (err == -ENOIOCTLCMD)
-- err = dev_ioctl(cmd, argp);
-- break;
-- }
-- return err;
--}
--
--int sock_create_lite(int family, int type, int protocol, struct socket **res)
--{
-- int err;
-- struct socket *sock = NULL;
--
-- err = security_socket_create(family, type, protocol, 1);
-- if (err)
-- goto out;
--
-- sock = sock_alloc();
-- if (!sock) {
-- err = -ENOMEM;
-- goto out;
-- }
--
-- sock->type = type;
-- err = security_socket_post_create(sock, family, type, protocol, 1);
-- if (err)
-- goto out_release;
--
--out:
-- *res = sock;
-- return err;
--out_release:
-- sock_release(sock);
-- sock = NULL;
-- goto out;
--}
--
--/* No kernel lock held - perfect */
--static unsigned int sock_poll(struct file *file, poll_table *wait)
--{
-- struct socket *sock;
--
-- /*
-- * We can't return errors to poll, so it's either yes or no.
-- */
-- sock = file->private_data;
-- return sock->ops->poll(file, sock, wait);
--}
--
--static int sock_mmap(struct file *file, struct vm_area_struct *vma)
--{
-- struct socket *sock = file->private_data;
--
-- return sock->ops->mmap(file, sock, vma);
--}
--
--static int sock_close(struct inode *inode, struct file *filp)
--{
-- /*
-- * It was possible the inode is NULL we were
-- * closing an unfinished socket.
-- */
--
-- if (!inode) {
-- printk(KERN_DEBUG "sock_close: NULL inode\n");
-- return 0;
-- }
-- sock_fasync(-1, filp, 0);
-- sock_release(SOCKET_I(inode));
-- return 0;
--}
--
--/*
-- * Update the socket async list
-- *
-- * Fasync_list locking strategy.
-- *
-- * 1. fasync_list is modified only under process context socket lock
-- * i.e. under semaphore.
-- * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
-- * or under socket lock.
-- * 3. fasync_list can be used from softirq context, so that
-- * modification under socket lock have to be enhanced with
-- * write_lock_bh(&sk->sk_callback_lock).
-- * --ANK (990710)
-- */
--
--static int sock_fasync(int fd, struct file *filp, int on)
--{
-- struct fasync_struct *fa, *fna = NULL, **prev;
-- struct socket *sock;
-- struct sock *sk;
--
-- if (on) {
-- fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
-- if (fna == NULL)
-- return -ENOMEM;
-- }
--
-- sock = filp->private_data;
--
-- sk = sock->sk;
-- if (sk == NULL) {
-- kfree(fna);
-- return -EINVAL;
-- }
--
-- lock_sock(sk);
--
-- prev = &(sock->fasync_list);
--
-- for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
-- if (fa->fa_file == filp)
-- break;
--
-- if (on) {
-- if (fa != NULL) {
-- write_lock_bh(&sk->sk_callback_lock);
-- fa->fa_fd = fd;
-- write_unlock_bh(&sk->sk_callback_lock);
--
-- kfree(fna);
-- goto out;
-- }
-- fna->fa_file = filp;
-- fna->fa_fd = fd;
-- fna->magic = FASYNC_MAGIC;
-- fna->fa_next = sock->fasync_list;
-- write_lock_bh(&sk->sk_callback_lock);
-- sock->fasync_list = fna;
-- write_unlock_bh(&sk->sk_callback_lock);
-- } else {
-- if (fa != NULL) {
-- write_lock_bh(&sk->sk_callback_lock);
-- *prev = fa->fa_next;
-- write_unlock_bh(&sk->sk_callback_lock);
-- kfree(fa);
-- }
-- }
--
--out:
-- release_sock(sock->sk);
-- return 0;
--}
--
--/* This function may be called only under socket lock or callback_lock */
--
--int sock_wake_async(struct socket *sock, int how, int band)
--{
-- if (!sock || !sock->fasync_list)
-- return -1;
-- switch (how) {
-- case 1:
--
-- if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
-- break;
-- goto call_kill;
-- case 2:
-- if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
-- break;
-- /* fall through */
-- case 0:
--call_kill:
-- __kill_fasync(sock->fasync_list, SIGIO, band);
-- break;
-- case 3:
-- __kill_fasync(sock->fasync_list, SIGURG, band);
-- }
-- return 0;
--}
--
--static int __sock_create(int family, int type, int protocol,
-- struct socket **res, int kern)
--{
-- int err;
-- struct socket *sock;
-- const struct net_proto_family *pf;
--
-- /*
-- * Check protocol is in range
-- */
-- if (family < 0 || family >= NPROTO)
-- return -EAFNOSUPPORT;
-- if (type < 0 || type >= SOCK_MAX)
-- return -EINVAL;
--
-- /* Compatibility.
--
-- This uglymoron is moved from INET layer to here to avoid
-- deadlock in module load.
-- */
-- if (family == PF_INET && type == SOCK_PACKET) {
-- static int warned;
-- if (!warned) {
-- warned = 1;
-- printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
-- current->comm);
-- }
-- family = PF_PACKET;
-- }
--
-- err = security_socket_create(family, type, protocol, kern);
-- if (err)
-- return err;
--
-- /*
-- * Allocate the socket and allow the family to set things up. if
-- * the protocol is 0, the family is instructed to select an appropriate
-- * default.
-- */
-- sock = sock_alloc();
-- if (!sock) {
-- if (net_ratelimit())
-- printk(KERN_WARNING "socket: no more sockets\n");
-- return -ENFILE; /* Not exactly a match, but its the
-- closest posix thing */
-- }
--
-- sock->type = type;
--
--#if defined(CONFIG_KMOD)
-- /* Attempt to load a protocol module if the find failed.
-- *
-- * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
-- * requested real, full-featured networking support upon configuration.
-- * Otherwise module support will break!
-- */
-- if (net_families[family] == NULL)
-- request_module("net-pf-%d", family);
--#endif
--
-- rcu_read_lock();
-- pf = rcu_dereference(net_families[family]);
-- err = -EAFNOSUPPORT;
-- if (!pf)
-- goto out_release;
--
-- /*
-- * We will call the ->create function, that possibly is in a loadable
-- * module, so we have to bump that loadable module refcnt first.
-- */
-- if (!try_module_get(pf->owner))
-- goto out_release;
--
-- /* Now protected by module ref count */
-- rcu_read_unlock();
--
-- err = pf->create(sock, protocol);
-- if (err < 0)
-- goto out_module_put;
--
-- /*
-- * Now to bump the refcnt of the [loadable] module that owns this
-- * socket at sock_release time we decrement its refcnt.
-- */
-- if (!try_module_get(sock->ops->owner))
-- goto out_module_busy;
--
-- /*
-- * Now that we're done with the ->create function, the [loadable]
-- * module can have its refcnt decremented
-- */
-- module_put(pf->owner);
-- err = security_socket_post_create(sock, family, type, protocol, kern);
-- if (err)
-- goto out_sock_release;
-- *res = sock;
--
-- return 0;
--
--out_module_busy:
-- err = -EAFNOSUPPORT;
--out_module_put:
-- sock->ops = NULL;
-- module_put(pf->owner);
--out_sock_release:
-- sock_release(sock);
-- return err;
--
--out_release:
-- rcu_read_unlock();
-- goto out_sock_release;
--}
--
--int sock_create(int family, int type, int protocol, struct socket **res)
--{
-- return __sock_create(family, type, protocol, res, 0);
--}
--
--int sock_create_kern(int family, int type, int protocol, struct socket **res)
--{
-- return __sock_create(family, type, protocol, res, 1);
--}
--
--asmlinkage long sys_socket(int family, int type, int protocol)
--{
-- int retval;
-- struct socket *sock;
--
-- retval = sock_create(family, type, protocol, &sock);
-- if (retval < 0)
-- goto out;
--
-- retval = sock_map_fd(sock);
-- if (retval < 0)
-- goto out_release;
--
--out:
-- /* It may be already another descriptor 8) Not kernel problem. */
-- return retval;
--
--out_release:
-- sock_release(sock);
-- return retval;
--}
--
--/*
-- * Create a pair of connected sockets.
-- */
--
--asmlinkage long sys_socketpair(int family, int type, int protocol,
-- int __user *usockvec)
--{
-- struct socket *sock1, *sock2;
-- int fd1, fd2, err;
-- struct file *newfile1, *newfile2;
--
-- /*
-- * Obtain the first socket and check if the underlying protocol
-- * supports the socketpair call.
-- */
--
-- err = sock_create(family, type, protocol, &sock1);
-- if (err < 0)
-- goto out;
--
-- err = sock_create(family, type, protocol, &sock2);
-- if (err < 0)
-- goto out_release_1;
--
-- err = sock1->ops->socketpair(sock1, sock2);
-- if (err < 0)
-- goto out_release_both;
--
-- fd1 = sock_alloc_fd(&newfile1);
-- if (unlikely(fd1 < 0)) {
-- err = fd1;
-- goto out_release_both;
-- }
--
-- fd2 = sock_alloc_fd(&newfile2);
-- if (unlikely(fd2 < 0)) {
-- err = fd2;
-- put_filp(newfile1);
-- put_unused_fd(fd1);
-- goto out_release_both;
-- }
--
-- err = sock_attach_fd(sock1, newfile1);
-- if (unlikely(err < 0)) {
-- goto out_fd2;
-- }
--
-- err = sock_attach_fd(sock2, newfile2);
-- if (unlikely(err < 0)) {
-- fput(newfile1);
-- goto out_fd1;
-- }
--
-- err = audit_fd_pair(fd1, fd2);
-- if (err < 0) {
-- fput(newfile1);
-- fput(newfile2);
-- goto out_fd;
-- }
--
-- fd_install(fd1, newfile1);
-- fd_install(fd2, newfile2);
-- /* fd1 and fd2 may be already another descriptors.
-- * Not kernel problem.
-- */
--
-- err = put_user(fd1, &usockvec[0]);
-- if (!err)
-- err = put_user(fd2, &usockvec[1]);
-- if (!err)
-- return 0;
--
-- sys_close(fd2);
-- sys_close(fd1);
-- return err;
--
--out_release_both:
-- sock_release(sock2);
--out_release_1:
-- sock_release(sock1);
--out:
-- return err;
--
--out_fd2:
-- put_filp(newfile1);
-- sock_release(sock1);
--out_fd1:
-- put_filp(newfile2);
-- sock_release(sock2);
--out_fd:
-- put_unused_fd(fd1);
-- put_unused_fd(fd2);
-- goto out;
--}
--
--/*
-- * Bind a name to a socket. Nothing much to do here since it's
-- * the protocol's responsibility to handle the local address.
-- *
-- * We move the socket address to kernel space before we call
-- * the protocol layer (having also checked the address is ok).
-- */
--
--asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
--{
-- struct socket *sock;
-- char address[MAX_SOCK_ADDR];
-- int err, fput_needed;
--
-- sock = sockfd_lookup_light(fd, &err, &fput_needed);
-- if (sock) {
-- err = move_addr_to_kernel(umyaddr, addrlen, address);
-- if (err >= 0) {
-- err = security_socket_bind(sock,
-- (struct sockaddr *)address,
-- addrlen);
-- if (!err)
-- err = sock->ops->bind(sock,
-- (struct sockaddr *)
-- address, addrlen);
-- }
-- fput_light(sock->file, fput_needed);
-- }
-- return err;
--}
--
--/*
-- * Perform a listen. Basically, we allow the protocol to do anything
-- * necessary for a listen, and if that works, we mark the socket as
-- * ready for listening.
-- */
--
--int sysctl_somaxconn __read_mostly = SOMAXCONN;
--
--asmlinkage long sys_listen(int fd, int backlog)
--{
-- struct socket *sock;
-- int err, fput_needed;
--
-- sock = sockfd_lookup_light(fd, &err, &fput_needed);
-- if (sock) {
-- if ((unsigned)backlog > sysctl_somaxconn)
-- backlog = sysctl_somaxconn;
--
-- err = security_socket_listen(sock, backlog);
-- if (!err)
-- err = sock->ops->listen(sock, backlog);
--
-- fput_light(sock->file, fput_needed);
-- }
-- return err;
--}
--
--/*
-- * For accept, we attempt to create a new socket, set up the link
-- * with the client, wake up the client, then return the new
-- * connected fd. We collect the address of the connector in kernel
-- * space and move it to user at the very end. This is unclean because
-- * we open the socket then return an error.
-- *
-- * 1003.1g adds the ability to recvmsg() to query connection pending
-- * status to recvmsg. We need to add that support in a way thats
-- * clean when we restucture accept also.
-- */
--
--asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
-- int __user *upeer_addrlen)
--{
-- struct socket *sock, *newsock;
-- struct file *newfile;
-- int err, len, newfd, fput_needed;
-- char address[MAX_SOCK_ADDR];
--
-- sock = sockfd_lookup_light(fd, &err, &fput_needed);
-- if (!sock)
-- goto out;
--
-- err = -ENFILE;
-- if (!(newsock = sock_alloc()))
-- goto out_put;
--
-- newsock->type = sock->type;
-- newsock->ops = sock->ops;
--
-- /*
-- * We don't need try_module_get here, as the listening socket (sock)
-- * has the protocol module (sock->ops->owner) held.
-- */
-- __module_get(newsock->ops->owner);
--
-- newfd = sock_alloc_fd(&newfile);
-- if (unlikely(newfd < 0)) {
-- err = newfd;
-- sock_release(newsock);
-- goto out_put;
-- }
--
-- err = sock_attach_fd(newsock, newfile);
-- if (err < 0)
-- goto out_fd_simple;
--
-- err = security_socket_accept(sock, newsock);
-- if (err)
-- goto out_fd;
--
-- err = sock->ops->accept(sock, newsock, sock->file->f_flags);
-- if (err < 0)
-- goto out_fd;
--
-- if (upeer_sockaddr) {
-- if (newsock->ops->getname(newsock, (struct sockaddr *)address,
-- &len, 2) < 0) {
-- err = -ECONNABORTED;
-- goto out_fd;
-- }
-- err = move_addr_to_user(address, len, upeer_sockaddr,
-- upeer_addrlen);
-- if (err < 0)
-- goto out_fd;
-- }
--
-- /* File flags are not inherited via accept() unlike another OSes. */
--
-- fd_install(newfd, newfile);
-- err = newfd;
--
-- security_socket_post_accept(sock, newsock);
--
--out_put:
-- fput_light(sock->file, fput_needed);
--out:
-- return err;
--out_fd_simple:
-- sock_release(newsock);
-- put_filp(newfile);
-- put_unused_fd(newfd);
-- goto out_put;
--out_fd:
-- fput(newfile);
-- put_unused_fd(newfd);
-- goto out_put;
--}
--
--/*
-- * Attempt to connect to a socket with the server address. The address
-- * is in user space so we verify it is OK and move it to kernel space.
-- *
-- * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
-- * break bindings
-- *
-- * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
-- * other SEQPACKET protocols that take time to connect() as it doesn't
-- * include the -EINPROGRESS status for such sockets.
-- */
--
--asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
-- int addrlen)
--{
-- struct socket *sock;
-- char address[MAX_SOCK_ADDR];
-- int err, fput_needed;
--
-- sock = sockfd_lookup_light(fd, &err, &fput_needed);
-- if (!sock)
-- goto out;
-- err = move_addr_to_kernel(uservaddr, addrlen, address);
-- if (err < 0)
-- goto out_put;
--
-- err =
-- security_socket_connect(sock, (struct sockaddr *)address, addrlen);
-- if (err)
-- goto out_put;
--
-- err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
-- sock->file->f_flags);
--out_put:
-- fput_light(sock->file, fput_needed);
--out:
-- return err;
--}
--
--/*
-- * Get the local address ('name') of a socket object. Move the obtained
-- * name to user space.
-- */
--
--asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
-- int __user *usockaddr_len)
--{
-- struct socket *sock;
-- char address[MAX_SOCK_ADDR];
-- int len, err, fput_needed;
--
-- sock = sockfd_lookup_light(fd, &err, &fput_needed);
-- if (!sock)
-- goto out;
--
-- err = security_socket_getsockname(sock);
-- if (err)
-- goto out_put;
--
-- err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
-- if (err)
-- goto out_put;
-- err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
--
--out_put:
-- fput_light(sock->file, fput_needed);
--out:
-- return err;
--}
--
--/*
-- * Get the remote address ('name') of a socket object. Move the obtained
-- * name to user space.
-- */
--
--asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
-- int __user *usockaddr_len)
--{
-- struct socket *sock;
-- char address[MAX_SOCK_ADDR];
-- int len, err, fput_needed;
--
-- sock = sockfd_lookup_light(fd, &err, &fput_needed);
-- if (sock != NULL) {
-- err = security_socket_getpeername(sock);
-- if (err) {
-- fput_light(sock->file, fput_needed);
-- return err;
-- }
--
-- err =
-- sock->ops->getname(sock, (struct sockaddr *)address, &len,
-- 1);
-- if (!err)
-- err = move_addr_to_user(address, len, usockaddr,
-- usockaddr_len);
-- fput_light(sock->file, fput_needed);
-- }
-- return err;
--}
--
--/*
-- * Send a datagram to a given address. We move the address into kernel
-- * space and check the user space data area is readable before invoking
-- * the protocol.
-- */
--
--asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
-- unsigned flags, struct sockaddr __user *addr,
-- int addr_len)
--{
-- struct socket *sock;
-- char address[MAX_SOCK_ADDR];
-- int err;
-- struct msghdr msg;
-- struct iovec iov;
-- int fput_needed;
-- struct file *sock_file;
--
-- sock_file = fget_light(fd, &fput_needed);
-- err = -EBADF;
-- if (!sock_file)
-- goto out;
--
-- sock = sock_from_file(sock_file, &err);
-- if (!sock)
-- goto out_put;
-- iov.iov_base = buff;
-- iov.iov_len = len;
-- msg.msg_name = NULL;
-- msg.msg_iov = &iov;
-- msg.msg_iovlen = 1;
-- msg.msg_control = NULL;
-- msg.msg_controllen = 0;
-- msg.msg_namelen = 0;
-- if (addr) {
-- err = move_addr_to_kernel(addr, addr_len, address);
-- if (err < 0)
-- goto out_put;
-- msg.msg_name = address;
-- msg.msg_namelen = addr_len;
-- }
-- if (sock->file->f_flags & O_NONBLOCK)
-- flags |= MSG_DONTWAIT;
-- msg.msg_flags = flags;
-- err = sock_sendmsg(sock, &msg, len);
--
--out_put:
-- fput_light(sock_file, fput_needed);
--out:
-- return err;
--}
--
--/*
-- * Send a datagram down a socket.
-- */
--
--asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
--{
-- return sys_sendto(fd, buff, len, flags, NULL, 0);
--}
--
--/*
-- * Receive a frame from the socket and optionally record the address of the
-- * sender. We verify the buffers are writable and if needed move the
-- * sender address from kernel to user space.
-- */
--
--asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
-- unsigned flags, struct sockaddr __user *addr,
-- int __user *addr_len)
--{
-- struct socket *sock;
-- struct iovec iov;
-- struct msghdr msg;
-- char address[MAX_SOCK_ADDR];
-- int err, err2;
-- struct file *sock_file;
-- int fput_needed;
--
-- sock_file = fget_light(fd, &fput_needed);
-- err = -EBADF;
-- if (!sock_file)
-- goto out;
--
-- sock = sock_from_file(sock_file, &err);
-- if (!sock)
-- goto out_put;
--
-- msg.msg_control = NULL;
-- msg.msg_controllen = 0;
-- msg.msg_iovlen = 1;
-- msg.msg_iov = &iov;
-- iov.iov_len = size;
-- iov.iov_base = ubuf;
-- msg.msg_name = address;
-- msg.msg_namelen = MAX_SOCK_ADDR;
-- if (sock->file->f_flags & O_NONBLOCK)
-- flags |= MSG_DONTWAIT;
-- err = sock_recvmsg(sock, &msg, size, flags);
--
-- if (err >= 0 && addr != NULL) {
-- err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
-- if (err2 < 0)
-- err = err2;
-- }
--out_put:
-- fput_light(sock_file, fput_needed);
--out:
-- return err;
--}
--
--/*
-- * Receive a datagram from a socket.
-- */
--
--asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
-- unsigned flags)
--{
-- return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
--}
--
--/*
-- * Set a socket option. Because we don't know the option lengths we have
-- * to pass the user mode parameter for the protocols to sort out.
-- */
--
--asmlinkage long sys_setsockopt(int fd, int level, int optname,
-- char __user *optval, int optlen)
--{
-- int err, fput_needed;
-- struct socket *sock;
--
-- if (optlen < 0)
-- return -EINVAL;
--
-- sock = sockfd_lookup_light(fd, &err, &fput_needed);
-- if (sock != NULL) {
-- err = security_socket_setsockopt(sock, level, optname);
-- if (err)
-- goto out_put;
--
-- if (level == SOL_SOCKET)
-- err =
-- sock_setsockopt(sock, level, optname, optval,
-- optlen);
-- else
-- err =
-- sock->ops->setsockopt(sock, level, optname, optval,
-- optlen);
--out_put:
-- fput_light(sock->file, fput_needed);
-- }
-- return err;
--}
--
--/*
-- * Get a socket option. Because we don't know the option lengths we have
-- * to pass a user mode parameter for the protocols to sort out.
-- */
--
--asmlinkage long sys_getsockopt(int fd, int level, int optname,
-- char __user *optval, int __user *optlen)
--{
-- int err, fput_needed;
-- struct socket *sock;
--
-- sock = sockfd_lookup_light(fd, &err, &fput_needed);
-- if (sock != NULL) {
-- err = security_socket_getsockopt(sock, level, optname);
-- if (err)
-- goto out_put;
--
-- if (level == SOL_SOCKET)
-- err =
-- sock_getsockopt(sock, level, optname, optval,
-- optlen);
-- else
-- err =
-- sock->ops->getsockopt(sock, level, optname, optval,
-- optlen);
--out_put:
-- fput_light(sock->file, fput_needed);
-- }
-- return err;
--}
--
--/*
-- * Shutdown a socket.
-- */
--
--asmlinkage long sys_shutdown(int fd, int how)
--{
-- int err, fput_needed;
-- struct socket *sock;
--
-- sock = sockfd_lookup_light(fd, &err, &fput_needed);
-- if (sock != NULL) {
-- err = security_socket_shutdown(sock, how);
-- if (!err)
-- err = sock->ops->shutdown(sock, how);
-- fput_light(sock->file, fput_needed);
-- }
-- return err;
--}
--
--/* A couple of helpful macros for getting the address of the 32/64 bit
-- * fields which are the same type (int / unsigned) on our platforms.
-- */
--#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
--#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
--#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
--
--/*
-- * BSD sendmsg interface
-- */
--
--asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
--{
-- struct compat_msghdr __user *msg_compat =
-- (struct compat_msghdr __user *)msg;
-- struct socket *sock;
-- char address[MAX_SOCK_ADDR];
-- struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
-- unsigned char ctl[sizeof(struct cmsghdr) + 20]
-- __attribute__ ((aligned(sizeof(__kernel_size_t))));
-- /* 20 is size of ipv6_pktinfo */
-- unsigned char *ctl_buf = ctl;
-- struct msghdr msg_sys;
-- int err, ctl_len, iov_size, total_len;
-- int fput_needed;
--
-- err = -EFAULT;
-- if (MSG_CMSG_COMPAT & flags) {
-- if (get_compat_msghdr(&msg_sys, msg_compat))
-- return -EFAULT;
-- }
-- else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
-- return -EFAULT;
--
-- sock = sockfd_lookup_light(fd, &err, &fput_needed);
-- if (!sock)
-- goto out;
--
-- /* do not move before msg_sys is valid */
-- err = -EMSGSIZE;
-- if (msg_sys.msg_iovlen > UIO_MAXIOV)
-- goto out_put;
--
-- /* Check whether to allocate the iovec area */
-- err = -ENOMEM;
-- iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
-- if (msg_sys.msg_iovlen > UIO_FASTIOV) {
-- iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
-- if (!iov)
-- goto out_put;
-- }
--
-- /* This will also move the address data into kernel space */
-- if (MSG_CMSG_COMPAT & flags) {
-- err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
-- } else
-- err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
-- if (err < 0)
-- goto out_freeiov;
-- total_len = err;
--
-- err = -ENOBUFS;
--
-- if (msg_sys.msg_controllen > INT_MAX)
-- goto out_freeiov;
-- ctl_len = msg_sys.msg_controllen;
-- if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
-- err =
-- cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
-- sizeof(ctl));
-- if (err)
-- goto out_freeiov;
-- ctl_buf = msg_sys.msg_control;
-- ctl_len = msg_sys.msg_controllen;
-- } else if (ctl_len) {
-- if (ctl_len > sizeof(ctl)) {
-- ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
-- if (ctl_buf == NULL)
-- goto out_freeiov;
-- }
-- err = -EFAULT;
-- /*
-- * Careful! Before this, msg_sys.msg_control contains a user pointer.
-- * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
-- * checking falls down on this.
-- */
-- if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
-- ctl_len))
-- goto out_freectl;
-- msg_sys.msg_control = ctl_buf;
-- }
-- msg_sys.msg_flags = flags;
--
-- if (sock->file->f_flags & O_NONBLOCK)
-- msg_sys.msg_flags |= MSG_DONTWAIT;
-- err = sock_sendmsg(sock, &msg_sys, total_len);
--
--out_freectl:
-- if (ctl_buf != ctl)
-- sock_kfree_s(sock->sk, ctl_buf, ctl_len);
--out_freeiov:
-- if (iov != iovstack)
-- sock_kfree_s(sock->sk, iov, iov_size);
--out_put:
-- fput_light(sock->file, fput_needed);
--out:
-- return err;
--}
--
--/*
-- * BSD recvmsg interface
-- */
--
--asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
-- unsigned int flags)
--{
-- struct compat_msghdr __user *msg_compat =
-- (struct compat_msghdr __user *)msg;
-- struct socket *sock;
-- struct iovec iovstack[UIO_FASTIOV];
-- struct iovec *iov = iovstack;
-- struct msghdr msg_sys;
-- unsigned long cmsg_ptr;
-- int err, iov_size, total_len, len;
-- int fput_needed;
--
-- /* kernel mode address */
-- char addr[MAX_SOCK_ADDR];
--
-- /* user mode address pointers */
-- struct sockaddr __user *uaddr;
-- int __user *uaddr_len;
--
-- if (MSG_CMSG_COMPAT & flags) {
-- if (get_compat_msghdr(&msg_sys, msg_compat))
-- return -EFAULT;
-- }
-- else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
-- return -EFAULT;
--
-- sock = sockfd_lookup_light(fd, &err, &fput_needed);
-- if (!sock)
-- goto out;
--
-- err = -EMSGSIZE;
-- if (msg_sys.msg_iovlen > UIO_MAXIOV)
-- goto out_put;
--
-- /* Check whether to allocate the iovec area */
-- err = -ENOMEM;
-- iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
-- if (msg_sys.msg_iovlen > UIO_FASTIOV) {
-- iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
-- if (!iov)
-- goto out_put;
-- }
--
-- /*
-- * Save the user-mode address (verify_iovec will change the
-- * kernel msghdr to use the kernel address space)
-- */
--
-- uaddr = (void __user *)msg_sys.msg_name;
-- uaddr_len = COMPAT_NAMELEN(msg);
-- if (MSG_CMSG_COMPAT & flags) {
-- err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
-- } else
-- err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
-- if (err < 0)
-- goto out_freeiov;
-- total_len = err;
--
-- cmsg_ptr = (unsigned long)msg_sys.msg_control;
-- msg_sys.msg_flags = 0;
-- if (MSG_CMSG_COMPAT & flags)
-- msg_sys.msg_flags = MSG_CMSG_COMPAT;
--
-- if (sock->file->f_flags & O_NONBLOCK)
-- flags |= MSG_DONTWAIT;
-- err = sock_recvmsg(sock, &msg_sys, total_len, flags);
-- if (err < 0)
-- goto out_freeiov;
-- len = err;
--
-- if (uaddr != NULL) {
-- err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
-- uaddr_len);
-- if (err < 0)
-- goto out_freeiov;
-- }
-- err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
-- COMPAT_FLAGS(msg));
-- if (err)
-- goto out_freeiov;
-- if (MSG_CMSG_COMPAT & flags)
-- err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
-- &msg_compat->msg_controllen);
-- else
-- err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
-- &msg->msg_controllen);
-- if (err)
-- goto out_freeiov;
-- err = len;
--
--out_freeiov:
-- if (iov != iovstack)
-- sock_kfree_s(sock->sk, iov, iov_size);
--out_put:
-- fput_light(sock->file, fput_needed);
--out:
-- return err;
--}
--
--#ifdef __ARCH_WANT_SYS_SOCKETCALL
--
--/* Argument list sizes for sys_socketcall */
--#define AL(x) ((x) * sizeof(unsigned long))
--static const unsigned char nargs[18]={
-- AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
-- AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-- AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
--};
--
--#undef AL
--
--/*
-- * System call vectors.
-- *
-- * Argument checking cleaned up. Saved 20% in size.
-- * This function doesn't need to set the kernel lock because
-- * it is set by the callees.
-- */
--
--asmlinkage long sys_socketcall(int call, unsigned long __user *args)
--{
-- unsigned long a[6];
-- unsigned long a0, a1;
-- int err;
--
-- if (call < 1 || call > SYS_RECVMSG)
-- return -EINVAL;
--
-- /* copy_from_user should be SMP safe. */
-- if (copy_from_user(a, args, nargs[call]))
-- return -EFAULT;
--
-- err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
-- if (err)
-- return err;
--
-- a0 = a[0];
-- a1 = a[1];
--
-- switch (call) {
-- case SYS_SOCKET:
-- err = sys_socket(a0, a1, a[2]);
-- break;
-- case SYS_BIND:
-- err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
-- break;
-- case SYS_CONNECT:
-- err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
-- break;
-- case SYS_LISTEN:
-- err = sys_listen(a0, a1);
-- break;
-- case SYS_ACCEPT:
-- err =
-- sys_accept(a0, (struct sockaddr __user *)a1,
-- (int __user *)a[2]);
-- break;
-- case SYS_GETSOCKNAME:
-- err =
-- sys_getsockname(a0, (struct sockaddr __user *)a1,
-- (int __user *)a[2]);
-- break;
-- case SYS_GETPEERNAME:
-- err =
-- sys_getpeername(a0, (struct sockaddr __user *)a1,
-- (int __user *)a[2]);
-- break;
-- case SYS_SOCKETPAIR:
-- err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
-- break;
-- case SYS_SEND:
-- err = sys_send(a0, (void __user *)a1, a[2], a[3]);
-- break;
-- case SYS_SENDTO:
-- err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
-- (struct sockaddr __user *)a[4], a[5]);
-- break;
-- case SYS_RECV:
-- err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
-- break;
-- case SYS_RECVFROM:
-- err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
-- (struct sockaddr __user *)a[4],
-- (int __user *)a[5]);
-- break;
-- case SYS_SHUTDOWN:
-- err = sys_shutdown(a0, a1);
-- break;
-- case SYS_SETSOCKOPT:
-- err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
-- break;
-- case SYS_GETSOCKOPT:
-- err =
-- sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
-- (int __user *)a[4]);
-- break;
-- case SYS_SENDMSG:
-- err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
-- break;
-- case SYS_RECVMSG:
-- err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
-- break;
-- default:
-- err = -EINVAL;
-- break;
-- }
-- return err;
--}
--
--#endif /* __ARCH_WANT_SYS_SOCKETCALL */
--
--/**
-- * sock_register - add a socket protocol handler
-- * @ops: description of protocol
-- *
-- * This function is called by a protocol handler that wants to
-- * advertise its address family, and have it linked into the
-- * socket interface. The value ops->family coresponds to the
-- * socket system call protocol family.
-- */
--int sock_register(const struct net_proto_family *ops)
--{
-- int err;
--
-- if (ops->family >= NPROTO) {
-- printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
-- NPROTO);
-- return -ENOBUFS;
-- }
--
-- spin_lock(&net_family_lock);
-- if (net_families[ops->family])
-- err = -EEXIST;
-- else {
-- net_families[ops->family] = ops;
-- err = 0;
-- }
-- spin_unlock(&net_family_lock);
--
-- printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
-- return err;
--}
--
--/**
-- * sock_unregister - remove a protocol handler
-- * @family: protocol family to remove
-- *
-- * This function is called by a protocol handler that wants to
-- * remove its address family, and have it unlinked from the
-- * new socket creation.
-- *
-- * If protocol handler is a module, then it can use module reference
-- * counts to protect against new references. If protocol handler is not
-- * a module then it needs to provide its own protection in
-- * the ops->create routine.
-- */
--void sock_unregister(int family)
--{
-- BUG_ON(family < 0 || family >= NPROTO);
--
-- spin_lock(&net_family_lock);
-- net_families[family] = NULL;
-- spin_unlock(&net_family_lock);
--
-- synchronize_rcu();
--
-- printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
--}
--
--static int __init sock_init(void)
--{
-- /*
-- * Initialize sock SLAB cache.
-- */
--
-- sk_init();
--
-- /*
-- * Initialize skbuff SLAB cache
-- */
-- skb_init();
--
-- /*
-- * Initialize the protocols module.
-- */
--
-- init_inodecache();
-- register_filesystem(&sock_fs_type);
-- sock_mnt = kern_mount(&sock_fs_type);
--
-- /* The real protocol initialization is performed in later initcalls.
-- */
--
--#ifdef CONFIG_NETFILTER
-- netfilter_init();
--#endif
--
-- return 0;
--}
--
--core_initcall(sock_init); /* early initcall */
--
--#ifdef CONFIG_PROC_FS
--void socket_seq_show(struct seq_file *seq)
--{
-- int cpu;
-- int counter = 0;
--
-- for_each_possible_cpu(cpu)
-- counter += per_cpu(sockets_in_use, cpu);
--
-- /* It can be negative, by the way. 8) */
-- if (counter < 0)
-- counter = 0;
--
-- seq_printf(seq, "sockets: used %d\n", counter);
--}
--#endif /* CONFIG_PROC_FS */
--
--#ifdef CONFIG_COMPAT
--static long compat_sock_ioctl(struct file *file, unsigned cmd,
-- unsigned long arg)
--{
-- struct socket *sock = file->private_data;
-- int ret = -ENOIOCTLCMD;
--
-- if (sock->ops->compat_ioctl)
-- ret = sock->ops->compat_ioctl(sock, cmd, arg);
--
-- return ret;
--}
--#endif
--
--int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
--{
-- return sock->ops->bind(sock, addr, addrlen);
--}
--
--int kernel_listen(struct socket *sock, int backlog)
--{
-- return sock->ops->listen(sock, backlog);
--}
--
--int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
--{
-- struct sock *sk = sock->sk;
-- int err;
--
-- err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
-- newsock);
-- if (err < 0)
-- goto done;
--
-- err = sock->ops->accept(sock, *newsock, flags);
-- if (err < 0) {
-- sock_release(*newsock);
-- goto done;
-- }
--
-- (*newsock)->ops = sock->ops;
--
--done:
-- return err;
--}
--
--int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
-- int flags)
--{
-- return sock->ops->connect(sock, addr, addrlen, flags);
--}
--
--int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
-- int *addrlen)
--{
-- return sock->ops->getname(sock, addr, addrlen, 0);
--}
--
--int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
-- int *addrlen)
--{
-- return sock->ops->getname(sock, addr, addrlen, 1);
--}
--
--int kernel_getsockopt(struct socket *sock, int level, int optname,
-- char *optval, int *optlen)
--{
-- mm_segment_t oldfs = get_fs();
-- int err;
--
-- set_fs(KERNEL_DS);
-- if (level == SOL_SOCKET)
-- err = sock_getsockopt(sock, level, optname, optval, optlen);
-- else
-- err = sock->ops->getsockopt(sock, level, optname, optval,
-- optlen);
-- set_fs(oldfs);
-- return err;
--}
--
--int kernel_setsockopt(struct socket *sock, int level, int optname,
-- char *optval, int optlen)
--{
-- mm_segment_t oldfs = get_fs();
-- int err;
--
-- set_fs(KERNEL_DS);
-- if (level == SOL_SOCKET)
-- err = sock_setsockopt(sock, level, optname, optval, optlen);
-- else
-- err = sock->ops->setsockopt(sock, level, optname, optval,
-- optlen);
-- set_fs(oldfs);
-- return err;
--}
--
--int kernel_sendpage(struct socket *sock, struct page *page, int offset,
-- size_t size, int flags)
--{
-- if (sock->ops->sendpage)
-- return sock->ops->sendpage(sock, page, offset, size, flags);
--
-- return sock_no_sendpage(sock, page, offset, size, flags);
--}
--
--int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
--{
-- mm_segment_t oldfs = get_fs();
-- int err;
--
-- set_fs(KERNEL_DS);
-- err = sock->ops->ioctl(sock, cmd, arg);
-- set_fs(oldfs);
--
-- return err;
--}
--
--/* ABI emulation layers need these two */
--EXPORT_SYMBOL(move_addr_to_kernel);
--EXPORT_SYMBOL(move_addr_to_user);
--EXPORT_SYMBOL(sock_create);
--EXPORT_SYMBOL(sock_create_kern);
--EXPORT_SYMBOL(sock_create_lite);
--EXPORT_SYMBOL(sock_map_fd);
--EXPORT_SYMBOL(sock_recvmsg);
--EXPORT_SYMBOL(sock_register);
--EXPORT_SYMBOL(sock_release);
--EXPORT_SYMBOL(sock_sendmsg);
--EXPORT_SYMBOL(sock_unregister);
--EXPORT_SYMBOL(sock_wake_async);
--EXPORT_SYMBOL(sockfd_lookup);
--EXPORT_SYMBOL(kernel_sendmsg);
--EXPORT_SYMBOL(kernel_recvmsg);
--EXPORT_SYMBOL(kernel_bind);
--EXPORT_SYMBOL(kernel_listen);
--EXPORT_SYMBOL(kernel_accept);
--EXPORT_SYMBOL(kernel_connect);
--EXPORT_SYMBOL(kernel_getsockname);
--EXPORT_SYMBOL(kernel_getpeername);
--EXPORT_SYMBOL(kernel_getsockopt);
--EXPORT_SYMBOL(kernel_setsockopt);
--EXPORT_SYMBOL(kernel_sendpage);
--EXPORT_SYMBOL(kernel_sock_ioctl);
diff -Nurb linux-2.6.22-570/net/sunrpc/auth.c linux-2.6.22-590/net/sunrpc/auth.c
--- linux-2.6.22-570/net/sunrpc/auth.c 2008-03-20 13:25:46.000000000 -0400
+++ linux-2.6.22-590/net/sunrpc/auth.c 2008-03-20 13:28:08.000000000 -0400
+vi -o ./net/bridge/br_if.c ./net/bridge/br_if.c.rej
+vi -o ./net/sunrpc/auth_unix.c ./net/sunrpc/auth_unix.c.rej
+vi -o ./scripts/checksyscalls.sh ./scripts/checksyscalls.sh.rej
-diff -Nurb linux-2.6.22-570/scripts/Makefile.build.orig linux-2.6.22-590/scripts/Makefile.build.orig
---- linux-2.6.22-570/scripts/Makefile.build.orig 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/scripts/Makefile.build.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,348 +0,0 @@
--# ==========================================================================
--# Building
--# ==========================================================================
--
--src := $(obj)
--
--PHONY := __build
--__build:
--
--# Read .config if it exist, otherwise ignore
---include include/config/auto.conf
--
--include scripts/Kbuild.include
--
--# The filename Kbuild has precedence over Makefile
--kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
--include $(if $(wildcard $(kbuild-dir)/Kbuild), $(kbuild-dir)/Kbuild, $(kbuild-dir)/Makefile)
--
--include scripts/Makefile.lib
--
--ifdef host-progs
--ifneq ($(hostprogs-y),$(host-progs))
--$(warning kbuild: $(obj)/Makefile - Usage of host-progs is deprecated. Please replace with hostprogs-y!)
--hostprogs-y += $(host-progs)
--endif
--endif
--
--# Do not include host rules unles needed
--ifneq ($(hostprogs-y)$(hostprogs-m),)
--include scripts/Makefile.host
--endif
--
--ifneq ($(KBUILD_SRC),)
--# Create output directory if not already present
--_dummy := $(shell [ -d $(obj) ] || mkdir -p $(obj))
--
--# Create directories for object files if directory does not exist
--# Needed when obj-y := dir/file.o syntax is used
--_dummy := $(foreach d,$(obj-dirs), $(shell [ -d $(d) ] || mkdir -p $(d)))
--endif
--
--
--ifdef EXTRA_TARGETS
--$(warning kbuild: $(obj)/Makefile - Usage of EXTRA_TARGETS is obsolete in 2.6. Please fix!)
--endif
--
--ifdef build-targets
--$(warning kbuild: $(obj)/Makefile - Usage of build-targets is obsolete in 2.6. Please fix!)
--endif
--
--ifdef export-objs
--$(warning kbuild: $(obj)/Makefile - Usage of export-objs is obsolete in 2.6. Please fix!)
--endif
--
--ifdef O_TARGET
--$(warning kbuild: $(obj)/Makefile - Usage of O_TARGET := $(O_TARGET) is obsolete in 2.6. Please fix!)
--endif
--
--ifdef L_TARGET
--$(error kbuild: $(obj)/Makefile - Use of L_TARGET is replaced by lib-y in 2.6. Please fix!)
--endif
--
--ifdef list-multi
--$(warning kbuild: $(obj)/Makefile - list-multi := $(list-multi) is obsolete in 2.6. Please fix!)
--endif
--
--ifndef obj
--$(warning kbuild: Makefile.build is included improperly)
--endif
--
--# ===========================================================================
--
--ifneq ($(strip $(lib-y) $(lib-m) $(lib-n) $(lib-)),)
--lib-target := $(obj)/lib.a
--endif
--
--ifneq ($(strip $(obj-y) $(obj-m) $(obj-n) $(obj-) $(lib-target)),)
--builtin-target := $(obj)/built-in.o
--endif
--
--# We keep a list of all modules in $(MODVERDIR)
--
--__build: $(if $(KBUILD_BUILTIN),$(builtin-target) $(lib-target) $(extra-y)) \
-- $(if $(KBUILD_MODULES),$(obj-m)) \
-- $(subdir-ym) $(always)
-- @:
--
--# Linus' kernel sanity checking tool
--ifneq ($(KBUILD_CHECKSRC),0)
-- ifeq ($(KBUILD_CHECKSRC),2)
-- quiet_cmd_force_checksrc = CHECK $<
-- cmd_force_checksrc = $(CHECK) $(CHECKFLAGS) $(c_flags) $< ;
-- else
-- quiet_cmd_checksrc = CHECK $<
-- cmd_checksrc = $(CHECK) $(CHECKFLAGS) $(c_flags) $< ;
-- endif
--endif
--
--
--# Compile C sources (.c)
--# ---------------------------------------------------------------------------
--
--# Default is built-in, unless we know otherwise
--modkern_cflags := $(CFLAGS_KERNEL)
--quiet_modtag := $(empty) $(empty)
--
--$(real-objs-m) : modkern_cflags := $(CFLAGS_MODULE)
--$(real-objs-m:.o=.i) : modkern_cflags := $(CFLAGS_MODULE)
--$(real-objs-m:.o=.s) : modkern_cflags := $(CFLAGS_MODULE)
--$(real-objs-m:.o=.lst): modkern_cflags := $(CFLAGS_MODULE)
--
--$(real-objs-m) : quiet_modtag := [M]
--$(real-objs-m:.o=.i) : quiet_modtag := [M]
--$(real-objs-m:.o=.s) : quiet_modtag := [M]
--$(real-objs-m:.o=.lst): quiet_modtag := [M]
--
--$(obj-m) : quiet_modtag := [M]
--
--# Default for not multi-part modules
--modname = $(basetarget)
--
--$(multi-objs-m) : modname = $(modname-multi)
--$(multi-objs-m:.o=.i) : modname = $(modname-multi)
--$(multi-objs-m:.o=.s) : modname = $(modname-multi)
--$(multi-objs-m:.o=.lst) : modname = $(modname-multi)
--$(multi-objs-y) : modname = $(modname-multi)
--$(multi-objs-y:.o=.i) : modname = $(modname-multi)
--$(multi-objs-y:.o=.s) : modname = $(modname-multi)
--$(multi-objs-y:.o=.lst) : modname = $(modname-multi)
--
--quiet_cmd_cc_s_c = CC $(quiet_modtag) $@
--cmd_cc_s_c = $(CC) $(c_flags) -fverbose-asm -S -o $@ $<
--
--$(obj)/%.s: $(src)/%.c FORCE
-- $(call if_changed_dep,cc_s_c)
--
--quiet_cmd_cc_i_c = CPP $(quiet_modtag) $@
--cmd_cc_i_c = $(CPP) $(c_flags) -o $@ $<
--
--$(obj)/%.i: $(src)/%.c FORCE
-- $(call if_changed_dep,cc_i_c)
--
--quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@
--cmd_cc_symtypes_c = \
-- $(CPP) -D__GENKSYMS__ $(c_flags) $< \
-- | $(GENKSYMS) -T $@ >/dev/null; \
-- test -s $@ || rm -f $@
--
--$(obj)/%.symtypes : $(src)/%.c FORCE
-- $(call if_changed_dep,cc_symtypes_c)
--
--# C (.c) files
--# The C file is compiled and updated dependency information is generated.
--# (See cmd_cc_o_c + relevant part of rule_cc_o_c)
--
--quiet_cmd_cc_o_c = CC $(quiet_modtag) $@
--
--ifndef CONFIG_MODVERSIONS
--cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
--
--else
--# When module versioning is enabled the following steps are executed:
--# o compile a .tmp_<file>.o from <file>.c
--# o if .tmp_<file>.o doesn't contain a __ksymtab version, i.e. does
--# not export symbols, we just rename .tmp_<file>.o to <file>.o and
--# are done.
--# o otherwise, we calculate symbol versions using the good old
--# genksyms on the preprocessed source and postprocess them in a way
--# that they are usable as a linker script
--# o generate <file>.o from .tmp_<file>.o using the linker to
--# replace the unresolved symbols __crc_exported_symbol with
--# the actual value of the checksum generated by genksyms
--
--cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $<
--cmd_modversions = \
-- if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
-- $(CPP) -D__GENKSYMS__ $(c_flags) $< \
-- | $(GENKSYMS) $(if $(KBUILD_SYMTYPES), \
-- -T $(@D)/$(@F:.o=.symtypes)) -a $(ARCH) \
-- > $(@D)/.tmp_$(@F:.o=.ver); \
-- \
-- $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \
-- -T $(@D)/.tmp_$(@F:.o=.ver); \
-- rm -f $(@D)/.tmp_$(@F) $(@D)/.tmp_$(@F:.o=.ver); \
-- else \
-- mv -f $(@D)/.tmp_$(@F) $@; \
-- fi;
--endif
--
--define rule_cc_o_c
-- $(call echo-cmd,checksrc) $(cmd_checksrc) \
-- $(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \
-- $(cmd_modversions) \
-- scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \
-- $(dot-target).tmp; \
-- rm -f $(depfile); \
-- mv -f $(dot-target).tmp $(dot-target).cmd
--endef
--
--# Built-in and composite module parts
--$(obj)/%.o: $(src)/%.c FORCE
-- $(call cmd,force_checksrc)
-- $(call if_changed_rule,cc_o_c)
--
--# Single-part modules are special since we need to mark them in $(MODVERDIR)
--
--$(single-used-m): $(obj)/%.o: $(src)/%.c FORCE
-- $(call cmd,force_checksrc)
-- $(call if_changed_rule,cc_o_c)
-- @{ echo $(@:.o=.ko); echo $@; } > $(MODVERDIR)/$(@F:.o=.mod)
--
--quiet_cmd_cc_lst_c = MKLST $@
-- cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \
-- $(CONFIG_SHELL) $(srctree)/scripts/makelst $*.o \
-- System.map $(OBJDUMP) > $@
--
--$(obj)/%.lst: $(src)/%.c FORCE
-- $(call if_changed_dep,cc_lst_c)
--
--# Compile assembler sources (.S)
--# ---------------------------------------------------------------------------
--
--modkern_aflags := $(AFLAGS_KERNEL)
--
--$(real-objs-m) : modkern_aflags := $(AFLAGS_MODULE)
--$(real-objs-m:.o=.s): modkern_aflags := $(AFLAGS_MODULE)
--
--quiet_cmd_as_s_S = CPP $(quiet_modtag) $@
--cmd_as_s_S = $(CPP) $(a_flags) -o $@ $<
--
--$(obj)/%.s: $(src)/%.S FORCE
-- $(call if_changed_dep,as_s_S)
--
--quiet_cmd_as_o_S = AS $(quiet_modtag) $@
--cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $<
--
--$(obj)/%.o: $(src)/%.S FORCE
-- $(call if_changed_dep,as_o_S)
--
--targets += $(real-objs-y) $(real-objs-m) $(lib-y)
--targets += $(extra-y) $(MAKECMDGOALS) $(always)
--
--# Linker scripts preprocessor (.lds.S -> .lds)
--# ---------------------------------------------------------------------------
--quiet_cmd_cpp_lds_S = LDS $@
-- cmd_cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ -o $@ $<
--
--$(obj)/%.lds: $(src)/%.lds.S FORCE
-- $(call if_changed_dep,cpp_lds_S)
--
--# Build the compiled-in targets
--# ---------------------------------------------------------------------------
--
--# To build objects in subdirs, we need to descend into the directories
--$(sort $(subdir-obj-y)): $(subdir-ym) ;
--
--#
--# Rule to compile a set of .o files into one .o file
--#
--ifdef builtin-target
--quiet_cmd_link_o_target = LD $@
--# If the list of objects to link is empty, just create an empty built-in.o
--cmd_link_o_target = $(if $(strip $(obj-y)),\
-- $(LD) $(ld_flags) -r -o $@ $(filter $(obj-y), $^),\
-- rm -f $@; $(AR) rcs $@)
--
--$(builtin-target): $(obj-y) FORCE
-- $(call if_changed,link_o_target)
--
--targets += $(builtin-target)
--endif # builtin-target
--
--#
--# Rule to compile a set of .o files into one .a file
--#
--ifdef lib-target
--quiet_cmd_link_l_target = AR $@
--cmd_link_l_target = rm -f $@; $(AR) $(EXTRA_ARFLAGS) rcs $@ $(lib-y)
--
--$(lib-target): $(lib-y) FORCE
-- $(call if_changed,link_l_target)
--
--targets += $(lib-target)
--endif
--
--#
--# Rule to link composite objects
--#
--# Composite objects are specified in kbuild makefile as follows:
--# <composite-object>-objs := <list of .o files>
--# or
--# <composite-object>-y := <list of .o files>
--link_multi_deps = \
--$(filter $(addprefix $(obj)/, \
--$($(subst $(obj)/,,$(@:.o=-objs))) \
--$($(subst $(obj)/,,$(@:.o=-y)))), $^)
--
--quiet_cmd_link_multi-y = LD $@
--cmd_link_multi-y = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps)
--
--quiet_cmd_link_multi-m = LD [M] $@
--cmd_link_multi-m = $(LD) $(ld_flags) $(LDFLAGS_MODULE) -o $@ $(link_multi_deps)
--
--# We would rather have a list of rules like
--# foo.o: $(foo-objs)
--# but that's not so easy, so we rather make all composite objects depend
--# on the set of all their parts
--$(multi-used-y) : %.o: $(multi-objs-y) FORCE
-- $(call if_changed,link_multi-y)
--
--$(multi-used-m) : %.o: $(multi-objs-m) FORCE
-- $(call if_changed,link_multi-m)
-- @{ echo $(@:.o=.ko); echo $(link_multi_deps); } > $(MODVERDIR)/$(@F:.o=.mod)
--
--targets += $(multi-used-y) $(multi-used-m)
--
--
--# Descending
--# ---------------------------------------------------------------------------
--
--PHONY += $(subdir-ym)
--$(subdir-ym):
-- $(Q)$(MAKE) $(build)=$@
--
--# Add FORCE to the prequisites of a target to force it to be always rebuilt.
--# ---------------------------------------------------------------------------
--
--PHONY += FORCE
--
--FORCE:
--
--# Read all saved command lines and dependencies for the $(targets) we
--# may be building above, using $(if_changed{,_dep}). As an
--# optimization, we don't need to read them if the target does not
--# exist, we will rebuild anyway in that case.
--
--targets := $(wildcard $(sort $(targets)))
--cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd))
--
--ifneq ($(cmd_files),)
-- include $(cmd_files)
--endif
--
--
--# Declare the contents of the .PHONY variable as phony. We keep that
--# information in a variable se we can use it in if_changed and friends.
--
--.PHONY: $(PHONY)
-diff -Nurb linux-2.6.22-570/scripts/Makefile.modpost.orig linux-2.6.22-590/scripts/Makefile.modpost.orig
---- linux-2.6.22-570/scripts/Makefile.modpost.orig 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/scripts/Makefile.modpost.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,132 +0,0 @@
--# ===========================================================================
--# Module versions
--# ===========================================================================
--#
--# Stage one of module building created the following:
--# a) The individual .o files used for the module
--# b) A <module>.o file which is the .o files above linked together
--# c) A <module>.mod file in $(MODVERDIR)/, listing the name of the
--# the preliminary <module>.o file, plus all .o files
--
--# Stage 2 is handled by this file and does the following
--# 1) Find all modules from the files listed in $(MODVERDIR)/
--# 2) modpost is then used to
--# 3) create one <module>.mod.c file pr. module
--# 4) create one Module.symvers file with CRC for all exported symbols
--# 5) compile all <module>.mod.c files
--# 6) final link of the module to a <module.ko> file
--
--# Step 3 is used to place certain information in the module's ELF
--# section, including information such as:
--# Version magic (see include/vermagic.h for full details)
--# - Kernel release
--# - SMP is CONFIG_SMP
--# - PREEMPT is CONFIG_PREEMPT
--# - GCC Version
--# Module info
--# - Module version (MODULE_VERSION)
--# - Module alias'es (MODULE_ALIAS)
--# - Module license (MODULE_LICENSE)
--# - See include/linux/module.h for more details
--
--# Step 4 is solely used to allow module versioning in external modules,
--# where the CRC of each module is retrieved from the Module.symers file.
--
--# KBUILD_MODPOST_WARN can be set to avoid error out in case of undefined
--# symbols in the final module linking stage
--# KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules.
--# This is solely usefull to speed up test compiles
--PHONY := _modpost
--_modpost: __modpost
--
--include include/config/auto.conf
--include scripts/Kbuild.include
--include scripts/Makefile.lib
--
--kernelsymfile := $(objtree)/Module.symvers
--modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
--
--# Step 1), find all modules listed in $(MODVERDIR)/
--__modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
--modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o)))
--
--# Stop after building .o files if NOFINAL is set. Makes compile tests quicker
--_modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules))
--
--
--# Step 2), invoke modpost
--# Includes step 3,4
--quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
-- cmd_modpost = scripts/mod/modpost \
-- $(if $(CONFIG_MODVERSIONS),-m) \
-- $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a,) \
-- $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \
-- $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \
-- $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \
-- $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
--
--PHONY += __modpost
--__modpost: $(modules:.ko=.o) FORCE
-- $(call cmd,modpost) $(wildcard vmlinux) $(filter-out FORCE,$^)
--
--quiet_cmd_kernel-mod = MODPOST $@
-- cmd_kernel-mod = $(cmd_modpost) $(KBUILD_VMLINUX_OBJS)
--
--PHONY += vmlinux
--vmlinux: FORCE
-- $(call cmd,kernel-mod)
--
--# Declare generated files as targets for modpost
--$(symverfile): __modpost ;
--$(modules:.ko=.mod.c): __modpost ;
--
--
--# Step 5), compile all *.mod.c files
--
--# modname is set to make c_flags define KBUILD_MODNAME
--modname = $(notdir $(@:.mod.o=))
--
--quiet_cmd_cc_o_c = CC $@
-- cmd_cc_o_c = $(CC) $(c_flags) $(CFLAGS_MODULE) \
-- -c -o $@ $<
--
--$(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE
-- $(call if_changed_dep,cc_o_c)
--
--targets += $(modules:.ko=.mod.o)
--
--# Step 6), final link of the modules
--quiet_cmd_ld_ko_o = LD [M] $@
-- cmd_ld_ko_o = $(LD) $(LDFLAGS) $(LDFLAGS_MODULE) -o $@ \
-- $(filter-out FORCE,$^)
--
--$(modules): %.ko :%.o %.mod.o FORCE
-- $(call if_changed,ld_ko_o)
--
--targets += $(modules)
--
--
--# Add FORCE to the prequisites of a target to force it to be always rebuilt.
--# ---------------------------------------------------------------------------
--
--PHONY += FORCE
--
--FORCE:
--
--# Read all saved command lines and dependencies for the $(targets) we
--# may be building above, using $(if_changed{,_dep}). As an
--# optimization, we don't need to read them if the target does not
--# exist, we will rebuild anyway in that case.
--
--targets := $(wildcard $(sort $(targets)))
--cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd))
--
--ifneq ($(cmd_files),)
-- include $(cmd_files)
--endif
--
--
--# Declare the contents of the .PHONY variable as phony. We keep that
--# information in a variable se we can use it in if_changed and friends.
--
--.PHONY: $(PHONY)
diff -Nurb linux-2.6.22-570/security/commoncap.c linux-2.6.22-590/security/commoncap.c
--- linux-2.6.22-570/security/commoncap.c 2008-03-20 13:25:46.000000000 -0400
+++ linux-2.6.22-590/security/commoncap.c 2008-03-20 13:28:08.000000000 -0400